[clang] [llvm] [SVE2.1][Clang][LLVM]Add 128bits builtin in Clang and LLVM intrinisc (PR #71930)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Nov 20 09:47:58 PST 2023
https://github.com/CarolineConcatto updated https://github.com/llvm/llvm-project/pull/71930
>From 0bf30aec802f6fe3d6cd74b16d00cb8db0d3c1b6 Mon Sep 17 00:00:00 2001
From: Caroline Concatto <caroline.concatto at arm.com>
Date: Fri, 10 Nov 2023 11:00:49 +0000
Subject: [PATCH 1/4] [SVE2.1][Clang][LLVM]Add 128bits builtin in Clang and
LLVM intrinisc
This patch implements the builtins in Clang
and the LLVM-IR intrinsic for the following:
EXTQ
// Variants are also available for:
// _s8, _s16, _u16, _s32, _u32, _s64, _u64
// _bf16, _f16, _f32, _f64
svuint8_t svextq_lane[_u8](svuint8_t zdn,
TBLQ and TBXQ
// Variants are also available for:
// _u8, _u16, _s16, _u32, _s32, _u64, _s64
// _bf16, _f16, _f32, _f64
svint8_t svtblq[_s8](svint8_t zn, svuint8_t zm);
svint8_t svtbxq[_s8](svint8_t zn, svuint8_t zm);
UZPQ1, UZPQ2, ZIPQ1 and ZIPQ2
// Variants are also available for:
// _s8, _u16, _s16, _u32, _s32, _u64, _s64
// _bf16, _f16, _f32, _f64
svuint8_t svuzpq1[_u8](svuint8_t zn, svuint8_t zm);
svuint8_t svuzpq2[_u8](svuint8_t zn, svuint8_t zm);
svuint8_t svzipq1[_u8](svuint8_t zn, svuint8_t zm);
svuint8_t svzipq2[_u8](svuint8_t zn, svuint8_t zm);
PMOV
// Variants are available for:
// _s8, _u16, _s16, _s32, _u32, _s64, _u64
svbool_t svpmov_lane[_u8](svuint8_t zn, uint64_t imm);
svbool_t svpmov[_u8](svuint8_t zn); // The immediate is zero
svuint8_t svpmov_u8_z(svbool_t pn); // The immediate is zero
// Variants are available for:
// _s16, _s32, _u32, _s64, _u64
svuint16_t svpmov_lane[_u16]_m(svuint16_t zd, svbool_t pn, uint64_t imm);
According to the PR#257[1]
[1]ARM-software/acle#257
Co-author by: Hassnaa Hamdi <hassnaa.hamdi at arm.com>
---
clang/include/clang/Basic/arm_sve.td | 33 ++
clang/include/clang/Basic/arm_sve_sme_incl.td | 3 +
clang/lib/Sema/SemaChecking.cpp | 12 +
.../acle_sve2p1_extq.c | 213 ++++++++++++
.../acle_sve2p1_pmov_to_pred.c | 304 ++++++++++++++++++
.../acle_sve2p1_pmov_to_vector.c | 276 ++++++++++++++++
.../acle_sve2p1_tblq.c | 214 ++++++++++++
.../acle_sve2p1_tbxq.c | 214 ++++++++++++
.../acle_sve2p1_uzpq1.c | 217 +++++++++++++
.../acle_sve2p1_uzpq2.c | 216 +++++++++++++
.../acle_sve2p1_zipq1.c | 217 +++++++++++++
.../acle_sve2p1_zipq2.c | 217 +++++++++++++
.../acle_sve2p1_imm.cpp | 6 +
llvm/include/llvm/IR/IntrinsicsAArch64.td | 41 +++
.../lib/Target/AArch64/AArch64InstrFormats.td | 33 ++
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 18 +-
llvm/lib/Target/AArch64/SVEInstrFormats.td | 86 ++++-
.../CodeGen/AArch64/sve2p1-intrinsics-extq.ll | 83 +++++
.../AArch64/sve2p1-intrinsics-pmov-to-pred.ll | 121 +++++++
.../sve2p1-intrinsics-pmov-to-vector.ll | 117 +++++++
.../CodeGen/AArch64/sve2p1-intrinsics-tblq.ll | 83 +++++
.../CodeGen/AArch64/sve2p1-intrinsics-tbxq.ll | 83 +++++
.../AArch64/sve2p1-intrinsics-uzpq1.ll | 85 +++++
.../AArch64/sve2p1-intrinsics-uzpq2.ll | 85 +++++
.../AArch64/sve2p1-intrinsics-zipq1.ll | 85 +++++
.../AArch64/sve2p1-intrinsics-zipq2.ll | 85 +++++
26 files changed, 3132 insertions(+), 15 deletions(-)
create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c
create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pmov_to_pred.c
create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pmov_to_vector.c
create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tblq.c
create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tbxq.c
create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c
create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c
create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c
create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c
create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-intrinsics-extq.ll
create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-intrinsics-pmov-to-pred.ll
create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-intrinsics-pmov-to-vector.ll
create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tblq.ll
create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tbxq.ll
create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq1.ll
create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq2.ll
create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq1.ll
create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq2.ll
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 3d4c2129565903d..c377a0b89c1d591 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1992,3 +1992,36 @@ let TargetGuard = "sme2" in {
def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>;
def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>;
}
+
+let TargetGuard = "sve2p1" in {
+ // ZIPQ1, ZIPQ2, UZPQ1, UZPQ2
+ def SVZIPQ1 : SInst<"svzipq1[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_zipq1", [], []>;
+ def SVZIPQ2 : SInst<"svzipq2[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_zipq2", [], []>;
+ def SVUZPQ1 : SInst<"svuzpq1[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_uzpq1", [], []>;
+ def SVUZPQ2 : SInst<"svuzpq2[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_uzpq2", [], []>;
+ // TBLQ, TBXQ
+ def SVTBLQ : SInst<"svtblq[_{d}]", "ddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tblq">;
+ def SVTBXQ : SInst<"svtbxq[_{d}]", "dddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tbxq">;
+ // EXTQ
+ def EXTQ : SInst<"svextq_lane[_{d}]", "dddk", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_extq_lane", [], [ImmCheck<2, ImmCheck0_15>]>;
+ // PMOV
+ // Move to Pred
+ multiclass PMOV_TO_PRED<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > {
+ def _LANE : SInst<name # "_lane[_{d}]", "Pdk", types, MergeNone, intrinsic, flags, [ImmCheck<1, immCh>]>;
+ def _ZERO : SInst<name # "[_{d}]", "Pd", types, MergeNone, intrinsic # "_zero", flags, []>;
+ }
+ defm SVPMOV_B_TO_PRED_LANE : PMOV_TO_PRED<"svpmov", "cUc", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_0>;
+ defm SVPMOV_H_TO_PRED_LANE : PMOV_TO_PRED<"svpmov", "sUs", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_1>;
+ defm SVPMOV_S_TO_PRED_LANE : PMOV_TO_PRED<"svpmov", "iUi", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_3>;
+ defm SVPMOV_D_TO_PRED_LANE : PMOV_TO_PRED<"svpmov", "lUl", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_7>;
+
+ // Move to Vector
+ multiclass PMOV_TO_VEC<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > {
+ def _M : SInst<name # "_lane[_{d}]", "ddPk", types, MergeOp1, intrinsic # "_merging", flags, [ImmCheck<2, immCh>]>;
+ def _Z : SInst<name # "_{d}_z", "dP", types, MergeNone, intrinsic # "_zeroing", flags, []>;
+ }
+ def SVPMOV_TO_VEC_LANE_B : SInst<"svpmov_{d}_z", "dP", "cUc", MergeNone, "aarch64_sve_pmov_to_vector_lane_zeroing", [], []>;
+ defm SVPMOV_TO_VEC_LANE_H : PMOV_TO_VEC<"svpmov", "sUs", "aarch64_sve_pmov_to_vector_lane", [], ImmCheck1_1>;
+ defm SVPMOV_TO_VEC_LANE_S : PMOV_TO_VEC<"svpmov", "iUi", "aarch64_sve_pmov_to_vector_lane", [], ImmCheck1_3>;
+ defm SVPMOV_TO_VEC_LANE_D : PMOV_TO_VEC<"svpmov", "lUl", "aarch64_sve_pmov_to_vector_lane" ,[], ImmCheck1_7>;
+}
diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td
index 22a2a3c5434d657..21dac067ab66e61 100644
--- a/clang/include/clang/Basic/arm_sve_sme_incl.td
+++ b/clang/include/clang/Basic/arm_sve_sme_incl.td
@@ -249,6 +249,9 @@ def ImmCheck0_0 : ImmCheckType<16>; // 0..0
def ImmCheck0_15 : ImmCheckType<17>; // 0..15
def ImmCheck0_255 : ImmCheckType<18>; // 0..255
def ImmCheck2_4_Mul2 : ImmCheckType<19>; // 2, 4
+def ImmCheck1_1 : ImmCheckType<20>; // 1..1
+def ImmCheck1_3 : ImmCheckType<21>; // 1..3
+def ImmCheck1_7 : ImmCheckType<22>; // 1..7
class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> {
int Arg = arg;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index ae588db02bbe722..9dfff132cd88db3 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3052,6 +3052,18 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 7))
HasError = true;
break;
+ case SVETypeFlags::ImmCheck1_1:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 1))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck1_3:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 3))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck1_7:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 7))
+ HasError = true;
+ break;
case SVETypeFlags::ImmCheckExtract:
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
(2048 / ElementSizeInBits) - 1))
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c
new file mode 100644
index 000000000000000..c49f8c838ace373
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c
@@ -0,0 +1,213 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svextq_lane_u8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 0)
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svextq_lane_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 0)
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svextq_lane_u8(svuint8_t zn, svuint8_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _u8,,)(zn, zm, 0);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svextq_lane_s8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 4)
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svextq_lane_s8u10__SVInt8_tu10__SVInt8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 4)
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svextq_lane_s8(svint8_t zn, svint8_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _s8,,)(zn, zm, 4);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svextq_lane_u16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 1)
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svextq_lane_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 1)
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svextq_lane_u16(svuint16_t zn, svuint16_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _u16,,)(zn, zm, 1);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svextq_lane_s16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 5)
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svextq_lane_s16u11__SVInt16_tu11__SVInt16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 5)
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svextq_lane_s16(svint16_t zn, svint16_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _s16,,)(zn, zm, 5);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svextq_lane_u32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 2)
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svextq_lane_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svextq_lane_u32(svuint32_t zn, svuint32_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _u32,,)(zn, zm, 2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svextq_lane_s32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 6)
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svextq_lane_s32u11__SVInt32_tu11__SVInt32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 6)
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svextq_lane_s32(svint32_t zn, svint32_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _s32,,)(zn, zm, 6);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svextq_lane_u64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 3)
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svextq_lane_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 3)
+// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+svuint64_t test_svextq_lane_u64(svuint64_t zn, svuint64_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _u64,,)(zn, zm, 3);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svextq_lane_s64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 7)
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svextq_lane_s64u11__SVInt64_tu11__SVInt64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 7)
+// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+svint64_t test_svextq_lane_s64(svint64_t zn, svint64_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _s64,,)(zn, zm, 7);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svextq_lane_f16
+// CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.lane.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 8)
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z20test_svextq_lane_f16u13__SVFloat16_tu13__SVFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.lane.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 8)
+// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svextq_lane_f16(svfloat16_t zn, svfloat16_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _f16,,)(zn, zm, 8);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svextq_lane_f32
+// CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.lane.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 9)
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z20test_svextq_lane_f32u13__SVFloat32_tu13__SVFloat32_t
+// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.lane.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 9)
+// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svextq_lane_f32(svfloat32_t zn, svfloat32_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _f32,,)(zn, zm, 9);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svextq_lane_f64
+// CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.lane.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 10)
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z20test_svextq_lane_f64u13__SVFloat64_tu13__SVFloat64_t
+// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.lane.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 10)
+// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svextq_lane_f64(svfloat64_t zn, svfloat64_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _f64,,)(zn, zm, 10);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svextq_lane_bf16
+// CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.lane.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 11)
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z21test_svextq_lane_bf16u14__SVBFloat16_tu14__SVBFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.lane.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 11)
+// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svextq_lane_bf16(svbfloat16_t zn, svbfloat16_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _bf16,,)(zn, zm, 11);
+}
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pmov_to_pred.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pmov_to_pred.c
new file mode 100644
index 000000000000000..84f058ad8c16d35
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pmov_to_pred.c
@@ -0,0 +1,304 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1
+#else
+#define SVE_ACLE_FUNC(A1, A2) A1##A2
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_lane_u8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 0)
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z19test_svpmov_lane_u8u11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 0)
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
+//
+svbool_t test_svpmov_lane_u8(svuint8_t zn) {
+ return SVE_ACLE_FUNC(svpmov_lane, _u8)(zn, 0);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_lane_s8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 0)
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z19test_svpmov_lane_s8u10__SVInt8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 0)
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
+//
+svbool_t test_svpmov_lane_s8(svint8_t zn) {
+ return SVE_ACLE_FUNC(svpmov_lane, _s8)(zn, 0);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_lane_u16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 0)
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z20test_svpmov_lane_u16u12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 0)
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpmov_lane_u16(svuint16_t zn) {
+ return SVE_ACLE_FUNC(svpmov_lane, _u16)(zn, 0);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_lane_s16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 1)
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z20test_svpmov_lane_s16u11__SVInt16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 1)
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpmov_lane_s16(svint16_t zn) {
+ return SVE_ACLE_FUNC(svpmov_lane, _s16)(zn, 1);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_lane_u32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 0)
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z20test_svpmov_lane_u32u12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 0)
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpmov_lane_u32(svuint32_t zn) {
+ return SVE_ACLE_FUNC(svpmov_lane, _u32)(zn, 0);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_lane_s32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 3)
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z20test_svpmov_lane_s32u11__SVInt32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 3)
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpmov_lane_s32(svint32_t zn) {
+ return SVE_ACLE_FUNC(svpmov_lane, _s32)(zn, 3);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_lane_u64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 0)
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z20test_svpmov_lane_u64u12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 0)
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpmov_lane_u64(svuint64_t zn) {
+ return SVE_ACLE_FUNC(svpmov_lane, _u64)(zn, 0);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_lane_s64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 7)
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z20test_svpmov_lane_s64u11__SVInt64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 7)
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpmov_lane_s64(svint64_t zn) {
+ return SVE_ACLE_FUNC(svpmov_lane, _s64)(zn, 7);
+}
+
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_u8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv16i8(<vscale x 16 x i8> [[ZN]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z14test_svpmov_u8u11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv16i8(<vscale x 16 x i8> [[ZN]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
+//
+svbool_t test_svpmov_u8(svuint8_t zn) {
+ return SVE_ACLE_FUNC(svpmov, _u8)(zn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_s8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv16i8(<vscale x 16 x i8> [[ZN]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z14test_svpmov_s8u10__SVInt8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv16i8(<vscale x 16 x i8> [[ZN]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
+//
+svbool_t test_svpmov_s8(svint8_t zn) {
+ return SVE_ACLE_FUNC(svpmov, _s8)(zn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_u16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv8i16(<vscale x 8 x i16> [[ZN]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z15test_svpmov_u16u12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv8i16(<vscale x 8 x i16> [[ZN]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpmov_u16(svuint16_t zn) {
+ return SVE_ACLE_FUNC(svpmov, _u16)(zn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_s16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv8i16(<vscale x 8 x i16> [[ZN]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z15test_svpmov_s16u11__SVInt16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv8i16(<vscale x 8 x i16> [[ZN]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpmov_s16(svint16_t zn) {
+ return SVE_ACLE_FUNC(svpmov, _s16)(zn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_u32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv4i32(<vscale x 4 x i32> [[ZN]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z15test_svpmov_u32u12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv4i32(<vscale x 4 x i32> [[ZN]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpmov_u32(svuint32_t zn) {
+ return SVE_ACLE_FUNC(svpmov, _u32)(zn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_s32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv4i32(<vscale x 4 x i32> [[ZN]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z15test_svpmov_s32u11__SVInt32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv4i32(<vscale x 4 x i32> [[ZN]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpmov_s32(svint32_t zn) {
+ return SVE_ACLE_FUNC(svpmov, _s32)(zn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_u64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv2i64(<vscale x 2 x i64> [[ZN]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z15test_svpmov_u64u12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv2i64(<vscale x 2 x i64> [[ZN]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpmov_u64(svuint64_t zn) {
+ return SVE_ACLE_FUNC(svpmov, _u64)(zn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_s64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv2i64(<vscale x 2 x i64> [[ZN]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z15test_svpmov_s64u11__SVInt64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv2i64(<vscale x 2 x i64> [[ZN]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpmov_s64(svint64_t zn) {
+ return SVE_ACLE_FUNC(svpmov, _s64)(zn);
+}
+
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pmov_to_vector.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pmov_to_vector.c
new file mode 100644
index 000000000000000..1e45f1ecedce55f
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pmov_to_vector.c
@@ -0,0 +1,276 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3) A1##A2##A3
+#endif
+
+// _m
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svpmov_lane_u16_m
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i1> [[TMP0]], i32 1)
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z22test_svpmov_lane_u16_mu12__SVUint16_tu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i1> [[TMP0]], i32 1)
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
+//
+svuint16_t test_svpmov_lane_u16_m(svuint16_t zn, svbool_t pn) {
+ return SVE_ACLE_FUNC(svpmov_lane, _u16, _m)(zn, pn, 1);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svpmov_lane_s16_m
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i1> [[TMP0]], i32 1)
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z22test_svpmov_lane_s16_mu11__SVInt16_tu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i1> [[TMP0]], i32 1)
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
+//
+svint16_t test_svpmov_lane_s16_m(svint16_t zn, svbool_t pn) {
+ return SVE_ACLE_FUNC(svpmov_lane, _s16, _m)(zn, pn, 1);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svpmov_lane_u32_m
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i1> [[TMP0]], i32 1)
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z22test_svpmov_lane_u32_mu12__SVUint32_tu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i1> [[TMP0]], i32 1)
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+//
+svuint32_t test_svpmov_lane_u32_m(svuint32_t zn, svbool_t pn) {
+ return SVE_ACLE_FUNC(svpmov_lane, _u32, _m)(zn, pn, 1);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svpmov_lane_s32_m
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i1> [[TMP0]], i32 3)
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z22test_svpmov_lane_s32_mu11__SVInt32_tu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i1> [[TMP0]], i32 3)
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+//
+svint32_t test_svpmov_lane_s32_m(svint32_t zn, svbool_t pn) {
+ return SVE_ACLE_FUNC(svpmov_lane, _s32, _m)(zn, pn, 3);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svpmov_lane_u64_m
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i1> [[TMP0]], i32 1)
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z22test_svpmov_lane_u64_mu12__SVUint64_tu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i1> [[TMP0]], i32 1)
+// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+//
+svuint64_t test_svpmov_lane_u64_m(svuint64_t zn, svbool_t pn) {
+ return SVE_ACLE_FUNC(svpmov_lane, _u64, _m)(zn, pn, 1);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svpmov_lane_s64_m
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i1> [[TMP0]], i32 7)
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z22test_svpmov_lane_s64_mu11__SVInt64_tu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i1> [[TMP0]], i32 7)
+// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+//
+svint64_t test_svpmov_lane_s64_m(svint64_t zn, svbool_t pn) {
+ return SVE_ACLE_FUNC(svpmov_lane, _s64, _m)(zn, pn, 7);
+}
+
+
+// _z
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svpmov_lane_u8_z
+// CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv16i8(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z21test_svpmov_lane_u8_zu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv16i8(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svpmov_lane_u8_z(svbool_t pn) {
+ return SVE_ACLE_FUNC(svpmov_u8, , _z)(pn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svpmov_lane_s8_z
+// CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv16i8(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z21test_svpmov_lane_s8_zu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv16i8(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svpmov_lane_s8_z(svbool_t pn) {
+ return SVE_ACLE_FUNC(svpmov_s8, , _z)(pn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svpmov_lane_u16_z
+// CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv8i16(<vscale x 8 x i1> [[TMP0]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z22test_svpmov_lane_u16_zu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv8i16(<vscale x 8 x i1> [[TMP0]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
+//
+svuint16_t test_svpmov_lane_u16_z(svbool_t pn) {
+ return SVE_ACLE_FUNC(svpmov_u16, , _z)(pn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svpmov_lane_s16_z
+// CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv8i16(<vscale x 8 x i1> [[TMP0]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z22test_svpmov_lane_s16_zu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv8i16(<vscale x 8 x i1> [[TMP0]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
+//
+svint16_t test_svpmov_lane_s16_z(svbool_t pn) {
+ return SVE_ACLE_FUNC(svpmov_s16, , _z)(pn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svpmov_lane_u32_z
+// CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv4i32(<vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z22test_svpmov_lane_u32_zu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv4i32(<vscale x 4 x i1> [[TMP0]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+//
+svuint32_t test_svpmov_lane_u32_z(svbool_t pn) {
+ return SVE_ACLE_FUNC(svpmov_u32, , _z)(pn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svpmov_lane_s32_z
+// CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv4i32(<vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z22test_svpmov_lane_s32_zu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv4i32(<vscale x 4 x i1> [[TMP0]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+//
+svint32_t test_svpmov_lane_s32_z(svbool_t pn) {
+ return SVE_ACLE_FUNC(svpmov_s32, , _z)(pn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svpmov_lane_u64_z
+// CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv2i64(<vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z22test_svpmov_lane_u64_zu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv2i64(<vscale x 2 x i1> [[TMP0]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+//
+svuint64_t test_svpmov_lane_u64_z(svbool_t pn) {
+ return SVE_ACLE_FUNC(svpmov_u64, , _z)(pn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svpmov_lane_s64_z
+// CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv2i64(<vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z22test_svpmov_lane_s64_zu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv2i64(<vscale x 2 x i1> [[TMP0]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+//
+svint64_t test_svpmov_lane_s64_z(svbool_t pn) {
+ return SVE_ACLE_FUNC(svpmov_s64, , _z)(pn);
+}
+
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tblq.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tblq.c
new file mode 100644
index 000000000000000..56e95d1abace317
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tblq.c
@@ -0,0 +1,214 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svtblq_u8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tblq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z14test_svtblq_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tblq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svtblq_u8(svuint8_t zn, svuint8_t zm) {
+ return SVE_ACLE_FUNC(svtblq, _u8,,)(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svtblq_u16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tblq.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z15test_svtblq_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tblq.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svtblq_u16(svuint16_t zn, svuint16_t zm) {
+ return SVE_ACLE_FUNC(svtblq, _u16,,)(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svtblq_u32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tblq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z15test_svtblq_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tblq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svtblq_u32(svuint32_t zn, svuint32_t zm) {
+ return SVE_ACLE_FUNC(svtblq, _u32,,)(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svtblq_u64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tblq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svtblq_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tblq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+svuint64_t test_svtblq_u64(svuint64_t zn, svuint64_t zm) {
+ return SVE_ACLE_FUNC(svtblq, _u64,,)(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svtblq_s8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tblq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z14test_svtblq_s8u10__SVInt8_tu11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tblq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svtblq_s8(svint8_t zn, svuint8_t zm) {
+ return SVE_ACLE_FUNC(svtblq, _s8,,)(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svtblq_s16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tblq.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z15test_svtblq_s16u11__SVInt16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tblq.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svtblq_s16(svint16_t zn, svuint16_t zm) {
+ return SVE_ACLE_FUNC(svtblq, _s16,,)(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svtblq_s32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tblq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z15test_svtblq_s32u11__SVInt32_tu12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tblq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svtblq_s32(svint32_t zn, svuint32_t zm) {
+ return SVE_ACLE_FUNC(svtblq, _s32,,)(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svtblq_s64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tblq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svtblq_s64u11__SVInt64_tu12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tblq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+svint64_t test_svtblq_s64(svint64_t zn, svuint64_t zm) {
+ return SVE_ACLE_FUNC(svtblq, _s64,,)(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svtblq_f16
+// CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.tblq.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z15test_svtblq_f16u13__SVFloat16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.tblq.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svtblq_f16(svfloat16_t zn, svuint16_t zm) {
+ return SVE_ACLE_FUNC(svtblq, _f16,,)(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svtblq_f32
+// CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.tblq.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z15test_svtblq_f32u13__SVFloat32_tu12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.tblq.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svtblq_f32(svfloat32_t zn, svuint32_t zm) {
+ return SVE_ACLE_FUNC(svtblq, _f32,,)(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svtblq_f64
+// CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.tblq.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z15test_svtblq_f64u13__SVFloat64_tu12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.tblq.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svtblq_f64(svfloat64_t zn, svuint64_t zm) {
+ return SVE_ACLE_FUNC(svtblq, _f64,,)(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svtblq_bf16
+// CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tblq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z16test_svtblq_bf16u14__SVBFloat16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tblq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svtblq_bf16(svbfloat16_t zn, svuint16_t zm) {
+ return SVE_ACLE_FUNC(svtblq, _bf16,,)(zn, zm);
+}
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tbxq.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tbxq.c
new file mode 100644
index 000000000000000..eeb589137d4f6d6
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tbxq.c
@@ -0,0 +1,214 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svtbxq_u8
+// CHECK-SAME: (<vscale x 16 x i8> [[PASSTHRU:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8> [[PASSTHRU]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z14test_svtbxq_u8u11__SVUint8_tu11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[PASSTHRU:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8> [[PASSTHRU]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svtbxq_u8(svuint8_t passthru, svuint8_t zn, svuint8_t zm) {
+ return SVE_ACLE_FUNC(svtbxq, _u8,,)(passthru, zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svtbxq_u16
+// CHECK-SAME: (<vscale x 8 x i16> [[PASSTHRU:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tbxq.nxv8i16(<vscale x 8 x i16> [[PASSTHRU]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z15test_svtbxq_u16u12__SVUint16_tu12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[PASSTHRU:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tbxq.nxv8i16(<vscale x 8 x i16> [[PASSTHRU]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svtbxq_u16(svuint16_t passthru, svuint16_t zn, svuint16_t zm) {
+ return SVE_ACLE_FUNC(svtbxq, _u16,,)(passthru, zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svtbxq_u32
+// CHECK-SAME: (<vscale x 4 x i32> [[PASSTHRU:%.*]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tbxq.nxv4i32(<vscale x 4 x i32> [[PASSTHRU]], <vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z15test_svtbxq_u32u12__SVUint32_tu12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[PASSTHRU:%.*]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tbxq.nxv4i32(<vscale x 4 x i32> [[PASSTHRU]], <vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svtbxq_u32(svuint32_t passthru, svuint32_t zn, svuint32_t zm) {
+ return SVE_ACLE_FUNC(svtbxq, _u32,,)(passthru, zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svtbxq_u64
+// CHECK-SAME: (<vscale x 2 x i64> [[PASSTHRU:%.*]], <vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tbxq.nxv2i64(<vscale x 2 x i64> [[PASSTHRU]], <vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svtbxq_u64u12__SVUint64_tu12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[PASSTHRU:%.*]], <vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tbxq.nxv2i64(<vscale x 2 x i64> [[PASSTHRU]], <vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+svuint64_t test_svtbxq_u64(svuint64_t passthru, svuint64_t zn, svuint64_t zm) {
+ return SVE_ACLE_FUNC(svtbxq, _u64,,)(passthru, zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svtbxq_s8
+// CHECK-SAME: (<vscale x 16 x i8> [[PASSTHRU:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8> [[PASSTHRU]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z14test_svtbxq_s8u10__SVInt8_tu10__SVInt8_tu11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[PASSTHRU:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8> [[PASSTHRU]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svtbxq_s8(svint8_t passthru, svint8_t zn, svuint8_t zm) {
+ return SVE_ACLE_FUNC(svtbxq, _s8,,)(passthru, zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svtbxq_s16
+// CHECK-SAME: (<vscale x 8 x i16> [[PASSTHRU:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tbxq.nxv8i16(<vscale x 8 x i16> [[PASSTHRU]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z15test_svtbxq_s16u11__SVInt16_tu11__SVInt16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[PASSTHRU:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tbxq.nxv8i16(<vscale x 8 x i16> [[PASSTHRU]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svtbxq_s16(svint16_t passthru, svint16_t zn, svuint16_t zm) {
+ return SVE_ACLE_FUNC(svtbxq, _s16,,)(passthru, zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svtbxq_s32
+// CHECK-SAME: (<vscale x 4 x i32> [[PASSTHRU:%.*]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tbxq.nxv4i32(<vscale x 4 x i32> [[PASSTHRU]], <vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z15test_svtbxq_s32u11__SVInt32_tu11__SVInt32_tu12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[PASSTHRU:%.*]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tbxq.nxv4i32(<vscale x 4 x i32> [[PASSTHRU]], <vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svtbxq_s32(svint32_t passthru, svint32_t zn, svuint32_t zm) {
+ return SVE_ACLE_FUNC(svtbxq, _s32,,)(passthru, zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svtbxq_s64
+// CHECK-SAME: (<vscale x 2 x i64> [[PASSTHRU:%.*]], <vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tbxq.nxv2i64(<vscale x 2 x i64> [[PASSTHRU]], <vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svtbxq_s64u11__SVInt64_tu11__SVInt64_tu12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[PASSTHRU:%.*]], <vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tbxq.nxv2i64(<vscale x 2 x i64> [[PASSTHRU]], <vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+svint64_t test_svtbxq_s64(svint64_t passthru, svint64_t zn, svuint64_t zm) {
+ return SVE_ACLE_FUNC(svtbxq, _s64,,)(passthru, zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svtbxq_f16
+// CHECK-SAME: (<vscale x 8 x half> [[PASSTHRU:%.*]], <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.tbxq.nxv8f16(<vscale x 8 x half> [[PASSTHRU]], <vscale x 8 x half> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z15test_svtbxq_f16u13__SVFloat16_tu13__SVFloat16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x half> [[PASSTHRU:%.*]], <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.tbxq.nxv8f16(<vscale x 8 x half> [[PASSTHRU]], <vscale x 8 x half> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svtbxq_f16(svfloat16_t passthru, svfloat16_t zn, svuint16_t zm) {
+ return SVE_ACLE_FUNC(svtbxq, _f16,,)(passthru, zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svtbxq_f32
+// CHECK-SAME: (<vscale x 4 x float> [[PASSTHRU:%.*]], <vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.tbxq.nxv4f32(<vscale x 4 x float> [[PASSTHRU]], <vscale x 4 x float> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z15test_svtbxq_f32u13__SVFloat32_tu13__SVFloat32_tu12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x float> [[PASSTHRU:%.*]], <vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.tbxq.nxv4f32(<vscale x 4 x float> [[PASSTHRU]], <vscale x 4 x float> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svtbxq_f32(svfloat32_t passthru, svfloat32_t zn, svuint32_t zm) {
+ return SVE_ACLE_FUNC(svtbxq, _f32,,)(passthru, zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svtbxq_f64
+// CHECK-SAME: (<vscale x 2 x double> [[PASSTHRU:%.*]], <vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.tbxq.nxv2f64(<vscale x 2 x double> [[PASSTHRU]], <vscale x 2 x double> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z15test_svtbxq_f64u13__SVFloat64_tu13__SVFloat64_tu12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x double> [[PASSTHRU:%.*]], <vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.tbxq.nxv2f64(<vscale x 2 x double> [[PASSTHRU]], <vscale x 2 x double> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svtbxq_f64(svfloat64_t passthru, svfloat64_t zn, svuint64_t zm) {
+ return SVE_ACLE_FUNC(svtbxq, _f64,,)(passthru, zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svtbxq_bf16
+// CHECK-SAME: (<vscale x 8 x bfloat> [[PASSTHRU:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbxq.nxv8bf16(<vscale x 8 x bfloat> [[PASSTHRU]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z16test_svtbxq_bf16u14__SVBFloat16_tu14__SVBFloat16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[PASSTHRU:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbxq.nxv8bf16(<vscale x 8 x bfloat> [[PASSTHRU]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svtbxq_bf16(svbfloat16_t passthru, svbfloat16_t zn, svuint16_t zm) {
+ return SVE_ACLE_FUNC(svtbxq, _bf16,,)(passthru, zn, zm);
+}
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c
new file mode 100644
index 000000000000000..0773f8d8d01966a
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c
@@ -0,0 +1,217 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svuzpq1_u8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svuzpq1_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svuzpq1_u8(svuint8_t zn, svuint8_t zm) {
+ return svuzpq1_u8(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svuzpq1_u16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svuzpq1_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svuzpq1_u16(svuint16_t zn, svuint16_t zm) {
+ return svuzpq1_u16(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svuzpq1_u32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svuzpq1_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svuzpq1_u32(svuint32_t zn, svuint32_t zm) {
+ return svuzpq1_u32(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svuzpq1_u64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svuzpq1_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+svuint64_t test_svuzpq1_u64(svuint64_t zn, svuint64_t zm) {
+ return svuzpq1_u64(zn, zm);
+}
+
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svuzpq1_s8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svuzpq1_s8u10__SVInt8_tu10__SVInt8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svuzpq1_s8(svint8_t zn, svint8_t zm) {
+ return svuzpq1_s8(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svuzpq1_s16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svuzpq1_s16u11__SVInt16_tu11__SVInt16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svuzpq1_s16(svint16_t zn, svint16_t zm) {
+ return svuzpq1_s16(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svuzpq1_s32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svuzpq1_s32u11__SVInt32_tu11__SVInt32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svuzpq1_s32(svint32_t zn, svint32_t zm) {
+ return svuzpq1_s32(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svuzpq1_s64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svuzpq1_s64u11__SVInt64_tu11__SVInt64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+svint64_t test_svuzpq1_s64(svint64_t zn, svint64_t zm) {
+ return svuzpq1_s64(zn, zm);
+}
+
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svuzpq1_f16
+// CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.uzpq1.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z16test_svuzpq1_f16u13__SVFloat16_tu13__SVFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.uzpq1.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svuzpq1_f16(svfloat16_t zn, svfloat16_t zm) {
+ return svuzpq1_f16(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svuzpq1_f32
+// CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.uzpq1.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z16test_svuzpq1_f32u13__SVFloat32_tu13__SVFloat32_t
+// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.uzpq1.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svuzpq1_f32(svfloat32_t zn, svfloat32_t zm) {
+ return svuzpq1_f32(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svuzpq1_f64
+// CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.uzpq1.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z16test_svuzpq1_f64u13__SVFloat64_tu13__SVFloat64_t
+// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.uzpq1.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svuzpq1_f64(svfloat64_t zn, svfloat64_t zm) {
+ return svuzpq1_f64(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svuzpq1_bf16
+// CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq1.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z17test_svuzpq1_bf16u14__SVBFloat16_tu14__SVBFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq1.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svuzpq1_bf16(svbfloat16_t zn, svbfloat16_t zm) {
+ return svuzpq1_bf16(zn, zm);
+}
+
+
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c
new file mode 100644
index 000000000000000..9883a7ef21196a3
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c
@@ -0,0 +1,216 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svuzpq2_u8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svuzpq2_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svuzpq2_u8(svuint8_t zn, svuint8_t zm) {
+ return svuzpq2_u8(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svuzpq2_u16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svuzpq2_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svuzpq2_u16(svuint16_t zn, svuint16_t zm) {
+ return svuzpq2_u16(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svuzpq2_u32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svuzpq2_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svuzpq2_u32(svuint32_t zn, svuint32_t zm) {
+ return svuzpq2_u32(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svuzpq2_u64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svuzpq2_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+svuint64_t test_svuzpq2_u64(svuint64_t zn, svuint64_t zm) {
+ return svuzpq2_u64(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svuzpq2_s8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svuzpq2_s8u10__SVInt8_tu10__SVInt8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svuzpq2_s8(svint8_t zn, svint8_t zm) {
+ return svuzpq2_s8(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svuzpq2_s16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svuzpq2_s16u11__SVInt16_tu11__SVInt16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svuzpq2_s16(svint16_t zn, svint16_t zm) {
+ return svuzpq2_s16(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svuzpq2_s32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svuzpq2_s32u11__SVInt32_tu11__SVInt32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svuzpq2_s32(svint32_t zn, svint32_t zm) {
+ return svuzpq2_s32(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svuzpq2_s64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svuzpq2_s64u11__SVInt64_tu11__SVInt64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+svint64_t test_svuzpq2_s64(svint64_t zn, svint64_t zm) {
+ return svuzpq2_s64(zn, zm);
+}
+
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svuzpq2_f16
+// CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.uzpq2.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z16test_svuzpq2_f16u13__SVFloat16_tu13__SVFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.uzpq2.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svuzpq2_f16(svfloat16_t zn, svfloat16_t zm) {
+ return svuzpq2_f16(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svuzpq2_f32
+// CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.uzpq2.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z16test_svuzpq2_f32u13__SVFloat32_tu13__SVFloat32_t
+// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.uzpq2.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svuzpq2_f32(svfloat32_t zn, svfloat32_t zm) {
+ return svuzpq2_f32(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svuzpq2_f64
+// CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.uzpq2.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z16test_svuzpq2_f64u13__SVFloat64_tu13__SVFloat64_t
+// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.uzpq2.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svuzpq2_f64(svfloat64_t zn, svfloat64_t zm) {
+ return svuzpq2_f64(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svuzpq2_bf16
+// CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq2.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z17test_svuzpq2_bf16u14__SVBFloat16_tu14__SVBFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq2.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svuzpq2_bf16(svbfloat16_t zn, svbfloat16_t zm) {
+ return svuzpq2_bf16(zn, zm);
+}
+
+
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c
new file mode 100644
index 000000000000000..c7a1a9b2c227f7a
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c
@@ -0,0 +1,217 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svzipq1_u8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svzipq1_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svzipq1_u8(svuint8_t zn, svuint8_t zm) {
+ return svzipq1_u8(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svzipq1_u16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svzipq1_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svzipq1_u16(svuint16_t zn, svuint16_t zm) {
+ return svzipq1_u16(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svzipq1_u32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svzipq1_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svzipq1_u32(svuint32_t zn, svuint32_t zm) {
+ return svzipq1_u32(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svzipq1_u64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svzipq1_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+svuint64_t test_svzipq1_u64(svuint64_t zn, svuint64_t zm) {
+ return svzipq1_u64(zn, zm);
+}
+
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svzipq1_s8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svzipq1_s8u10__SVInt8_tu10__SVInt8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svzipq1_s8(svint8_t zn, svint8_t zm) {
+ return svzipq1_s8(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svzipq1_s16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svzipq1_s16u11__SVInt16_tu11__SVInt16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svzipq1_s16(svint16_t zn, svint16_t zm) {
+ return svzipq1_s16(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svzipq1_s32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svzipq1_s32u11__SVInt32_tu11__SVInt32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svzipq1_s32(svint32_t zn, svint32_t zm) {
+ return svzipq1_s32(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svzipq1_s64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svzipq1_s64u11__SVInt64_tu11__SVInt64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+svint64_t test_svzipq1_s64(svint64_t zn, svint64_t zm) {
+ return svzipq1_s64(zn, zm);
+}
+
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svzipq1_f16
+// CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.zipq1.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z16test_svzipq1_f16u13__SVFloat16_tu13__SVFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.zipq1.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svzipq1_f16(svfloat16_t zn, svfloat16_t zm) {
+ return svzipq1_f16(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svzipq1_f32
+// CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.zipq1.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z16test_svzipq1_f32u13__SVFloat32_tu13__SVFloat32_t
+// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.zipq1.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svzipq1_f32(svfloat32_t zn, svfloat32_t zm) {
+ return svzipq1_f32(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svzipq1_f64
+// CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.zipq1.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z16test_svzipq1_f64u13__SVFloat64_tu13__SVFloat64_t
+// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.zipq1.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svzipq1_f64(svfloat64_t zn, svfloat64_t zm) {
+ return svzipq1_f64(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svzipq1_bf16
+// CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq1.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z17test_svzipq1_bf16u14__SVBFloat16_tu14__SVBFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq1.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svzipq1_bf16(svbfloat16_t zn, svbfloat16_t zm) {
+ return svzipq1_bf16(zn, zm);
+}
+
+
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c
new file mode 100644
index 000000000000000..220352ece1984b6
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c
@@ -0,0 +1,217 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svzipq2_u8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svzipq2_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svzipq2_u8(svuint8_t zn, svuint8_t zm) {
+ return svzipq2_u8(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svzipq2_u16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svzipq2_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svzipq2_u16(svuint16_t zn, svuint16_t zm) {
+ return svzipq2_u16(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svzipq2_u32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svzipq2_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svzipq2_u32(svuint32_t zn, svuint32_t zm) {
+ return svzipq2_u32(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svzipq2_u64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svzipq2_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+svuint64_t test_svzipq2_u64(svuint64_t zn, svuint64_t zm) {
+ return svzipq2_u64(zn, zm);
+}
+
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svzipq2_s8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svzipq2_s8u10__SVInt8_tu10__SVInt8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svzipq2_s8(svint8_t zn, svint8_t zm) {
+ return svzipq2_s8(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svzipq2_s16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svzipq2_s16u11__SVInt16_tu11__SVInt16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svzipq2_s16(svint16_t zn, svint16_t zm) {
+ return svzipq2_s16(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svzipq2_s32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svzipq2_s32u11__SVInt32_tu11__SVInt32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svzipq2_s32(svint32_t zn, svint32_t zm) {
+ return svzipq2_s32(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svzipq2_s64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svzipq2_s64u11__SVInt64_tu11__SVInt64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+svint64_t test_svzipq2_s64(svint64_t zn, svint64_t zm) {
+ return svzipq2_s64(zn, zm);
+}
+
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svzipq2_f16
+// CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.zipq2.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z16test_svzipq2_f16u13__SVFloat16_tu13__SVFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.zipq2.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svzipq2_f16(svfloat16_t zn, svfloat16_t zm) {
+ return svzipq2_f16(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svzipq2_f32
+// CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.zipq2.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z16test_svzipq2_f32u13__SVFloat32_tu13__SVFloat32_t
+// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.zipq2.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svzipq2_f32(svfloat32_t zn, svfloat32_t zm) {
+ return svzipq2_f32(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svzipq2_f64
+// CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.zipq2.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z16test_svzipq2_f64u13__SVFloat64_tu13__SVFloat64_t
+// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.zipq2.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svzipq2_f64(svfloat64_t zn, svfloat64_t zm) {
+ return svzipq2_f64(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svzipq2_bf16
+// CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq2.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z17test_svzipq2_bf16u14__SVBFloat16_tu14__SVBFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq2.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svzipq2_bf16(svbfloat16_t zn, svbfloat16_t zm) {
+ return svzipq2_bf16(zn, zm);
+}
+
+
diff --git a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
index c4e087c8b7d79ea..30d60cfe205e7e0 100644
--- a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
+++ b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
@@ -116,3 +116,9 @@ void test_svdot_lane_2way(svint32_t s32, svuint32_t u32, svint16_t s16, svuint16
svdot_lane_u32_u16_u16(u32, u16, u16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
svdot_lane_f32_f16_f16(f32, f16, f16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
}
+
+__attribute__((target("+sve2p1")))
+void test_svextq_lane(svint16_t zn_i16, svint16_t zm_i16, svfloat16_t zn_f16, svfloat16_t zm_f16){
+ svextq_lane_s16(zn_i16, zm_i16, -1); // expected-error {{argument value -1 is outside the valid range [0, 15]}}
+ svextq_lane_f16(zn_f16, zm_f16, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index a42e2c49cb477ba..7d126d13b5a6460 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -3455,3 +3455,44 @@ let TargetPrefix = "aarch64" in {
def int_aarch64_sve_sel_x4 : SVE2_VG4_Sel_Intrinsic;
}
+
+// SVE2.1 - ZIPQ1, ZIPQ2, UZPQ1, UZPQ2
+//
+def int_aarch64_sve_zipq1 : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_zipq2 : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_uzpq1 : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_uzpq2 : AdvSIMD_2VectorArg_Intrinsic;
+
+// SVE2.1 - Programmable table lookup within each quadword vector segment
+// (zeroing)/(merging)
+//
+def int_aarch64_sve_tblq : AdvSIMD_SVE_TBL_Intrinsic;
+def int_aarch64_sve_tbxq : AdvSIMD_SVE2_TBX_Intrinsic;
+
+// SVE2.1 - Extract vector segment from each pair of quadword segments.
+//
+def int_aarch64_sve_extq_lane : AdvSIMD_2VectorArgIndexed_Intrinsic;
+
+//
+// SVE2.1 - Move predicate to/from vector
+//
+def int_aarch64_sve_pmov_to_pred_lane :
+ DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [llvm_anyvector_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
+def int_aarch64_sve_pmov_to_pred_lane_zero :
+ DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [llvm_anyvector_ty],
+ [IntrNoMem]>;
+
+def int_aarch64_sve_pmov_to_vector_lane_merging :
+ DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+
+def int_aarch64_sve_pmov_to_vector_lane_zeroing :
+ DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [IntrNoMem]>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index f88f5a240a1fd7f..68e87f491a09e45 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -818,8 +818,11 @@ def tvecshiftR64 : Operand<i32>, TImmLeaf<i32, [{
def Imm0_0Operand : AsmImmRange<0, 0>;
def Imm0_1Operand : AsmImmRange<0, 1>;
+def Imm1_1Operand : AsmImmRange<1, 1>;
def Imm0_3Operand : AsmImmRange<0, 3>;
+def Imm1_3Operand : AsmImmRange<1, 3>;
def Imm0_7Operand : AsmImmRange<0, 7>;
+def Imm1_7Operand : AsmImmRange<1, 7>;
def Imm0_15Operand : AsmImmRange<0, 15>;
def Imm0_31Operand : AsmImmRange<0, 31>;
def Imm0_63Operand : AsmImmRange<0, 63>;
@@ -1035,6 +1038,13 @@ def timm0_1 : Operand<i64>, TImmLeaf<i64, [{
let ParserMatchClass = Imm0_1Operand;
}
+// timm32_0_0 predicate - True if the 32-bit immediate is in the range [0,0]
+def timm32_0_0 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm) == 0;
+}]> {
+ let ParserMatchClass = Imm0_0Operand;
+}
+
// timm32_0_1 predicate - True if the 32-bit immediate is in the range [0,1]
def timm32_0_1 : Operand<i32>, TImmLeaf<i32, [{
return ((uint32_t)Imm) < 2;
@@ -1042,6 +1052,20 @@ def timm32_0_1 : Operand<i32>, TImmLeaf<i32, [{
let ParserMatchClass = Imm0_1Operand;
}
+// timm32_1_1 - True if the 32-bit immediate is in the range [1,1]
+def timm32_1_1 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm) == 1;
+}]> {
+ let ParserMatchClass = Imm1_1Operand;
+}
+
+// timm32_1_3 predicate - True if the 32-bit immediate is in the range [1,3]
+def timm32_1_3 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm) > 0 && ((uint32_t)Imm) < 4;
+}]> {
+ let ParserMatchClass = Imm1_3Operand;
+}
+
// imm0_15 predicate - True if the immediate is in the range [0,15]
def imm0_15 : Operand<i64>, ImmLeaf<i64, [{
return ((uint64_t)Imm) < 16;
@@ -1077,6 +1101,13 @@ def timm32_0_7 : Operand<i32>, TImmLeaf<i32, [{
let ParserMatchClass = Imm0_7Operand;
}
+// timm32_1_7 predicate - True if the 32-bit immediate is in the range [1,7]
+def timm32_1_7 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm) > 0 && ((uint32_t)Imm) < 8;
+}]> {
+ let ParserMatchClass = Imm1_7Operand;
+}
+
// imm32_0_15 predicate - True if the 32-bit immediate is in the range [0,15]
def imm32_0_15 : Operand<i32>, ImmLeaf<i32, [{
return ((uint32_t)Imm) < 16;
@@ -1430,6 +1461,8 @@ let OperandNamespace = "AArch64" in {
let OperandType = "OPERAND_IMPLICIT_IMM_0" in {
defm VectorIndex0 : VectorIndex<i64, VectorIndex0Operand,
[{ return ((uint64_t)Imm) == 0; }]>;
+ defm VectorIndex032b : VectorIndex<i32, VectorIndex0Operand,
+ [{ return ((uint32_t)Imm) == 0; }]>;
}
}
defm VectorIndex1 : VectorIndex<i64, VectorIndex1Operand,
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index a53973bad92e25f..18a1deb5955c3f0 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3994,10 +3994,10 @@ defm FMAXQV : sve2p1_fp_reduction_q<0b110, "fmaxqv">;
defm FMINQV : sve2p1_fp_reduction_q<0b111, "fminqv">;
defm DUPQ_ZZI : sve2p1_dupq<"dupq">;
-def EXTQ_ZZI : sve2p1_extq<"extq">;
+defm EXTQ_ZZI : sve2p1_extq<"extq", int_aarch64_sve_extq_lane>;
-defm PMOV_PZI : sve2p1_vector_to_pred<"pmov">;
-defm PMOV_ZIP : sve2p1_pred_to_vector<"pmov">;
+defm PMOV_PZI : sve2p1_vector_to_pred<"pmov", int_aarch64_sve_pmov_to_pred_lane, int_aarch64_sve_pmov_to_pred_lane_zero>;
+defm PMOV_ZIP : sve2p1_pred_to_vector<"pmov", int_aarch64_sve_pmov_to_vector_lane_merging, int_aarch64_sve_pmov_to_vector_lane_zeroing>;
defm ORQV_VPZ : sve2p1_int_reduce_q<0b1100, "orqv">;
defm EORQV_VPZ : sve2p1_int_reduce_q<0b1101, "eorqv">;
@@ -4008,12 +4008,12 @@ defm UMAXQV_VPZ : sve2p1_int_reduce_q<0b0101, "umaxqv">;
defm SMINQV_VPZ : sve2p1_int_reduce_q<0b0110, "sminqv">;
defm UMINQV_VPZ : sve2p1_int_reduce_q<0b0111, "uminqv">;
-defm TBXQ_ZZZ : sve2_int_perm_tbx<"tbxq", 0b10, null_frag>;
-defm ZIPQ1_ZZZ : sve2p1_permute_vec_elems_q<0b000, "zipq1">;
-defm ZIPQ2_ZZZ : sve2p1_permute_vec_elems_q<0b001, "zipq2">;
-defm UZPQ1_ZZZ : sve2p1_permute_vec_elems_q<0b010, "uzpq1">;
-defm UZPQ2_ZZZ : sve2p1_permute_vec_elems_q<0b011, "uzpq2">;
-defm TBLQ_ZZZ : sve2p1_tblq<"tblq">;
+defm ZIPQ1_ZZZ : sve2p1_permute_vec_elems_q<0b000, "zipq1", int_aarch64_sve_zipq1>;
+defm ZIPQ2_ZZZ : sve2p1_permute_vec_elems_q<0b001, "zipq2", int_aarch64_sve_zipq2>;
+defm UZPQ1_ZZZ : sve2p1_permute_vec_elems_q<0b010, "uzpq1", int_aarch64_sve_uzpq1>;
+defm UZPQ2_ZZZ : sve2p1_permute_vec_elems_q<0b011, "uzpq2", int_aarch64_sve_uzpq2>;
+defm TBXQ_ZZZ : sve2_int_perm_tbx<"tbxq", 0b10, int_aarch64_sve_tbxq>;
+defm TBLQ_ZZZ : sve2p1_tblq<"tblq", int_aarch64_sve_tblq>;
} // End HasSVE2p1_or_HasSME2p1
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index d54be1e406fed95..8dbfe098c7b5d84 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -9913,7 +9913,7 @@ multiclass sve2p1_dupq<string mnemonic> {
// SVE Permute Vector - Quadwords (EXTQ)
class sve2p1_extq<string mnemonic>
- : I<(outs ZPR8:$Zdn), (ins ZPR8:$_Zdn, ZPR8:$Zm, imm0_15:$imm4),
+ : I<(outs ZPR8:$Zdn), (ins ZPR8:$_Zdn, ZPR8:$Zm, timm32_0_15:$imm4),
mnemonic, "\t$Zdn, $_Zdn, $Zm, $imm4",
"", []>, Sched<[]> {
bits<5> Zdn;
@@ -9931,6 +9931,19 @@ class sve2p1_extq<string mnemonic>
let hasSideEffects = 0;
}
+multiclass sve2p1_extq<string mnemonic, SDPatternOperator Op> {
+ def NAME : sve2p1_extq<mnemonic>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, Op, nxv16i8, nxv16i8, i32, timm32_0_15, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, Op, nxv8i16, nxv8i16, i32, timm32_0_15, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, Op, nxv4i32, nxv4i32, i32, timm32_0_15, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, Op, nxv2i64, nxv2i64, i32, timm32_0_15, !cast<Instruction>(NAME)>;
+
+ def : SVE_3_Op_Imm_Pat<nxv8f16, Op, nxv8f16, nxv8f16, i32, timm32_0_15, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_Imm_Pat<nxv4f32, Op, nxv4f32, nxv4f32, i32, timm32_0_15, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_Imm_Pat<nxv2f64, Op, nxv2f64, nxv2f64, i32, timm32_0_15, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_Imm_Pat<nxv8bf16, Op, nxv8bf16, nxv8bf16, i32, timm32_0_15, !cast<Instruction>(NAME
+)>;
+}
// SVE move predicate from vector
class sve2p1_vector_to_pred<bits<4> opc, string mnemonic,
@@ -9952,8 +9965,8 @@ class sve2p1_vector_to_pred<bits<4> opc, string mnemonic,
let hasSideEffects = 0;
}
-multiclass sve2p1_vector_to_pred<string mnemonic> {
- def _B : sve2p1_vector_to_pred<{0, 0, 0, 1}, mnemonic, PPR8, VectorIndex0>;
+multiclass sve2p1_vector_to_pred<string mnemonic, SDPatternOperator Op_lane, SDPatternOperator Op> {
+ def _B : sve2p1_vector_to_pred<{0, 0, 0, 1}, mnemonic, PPR8, VectorIndex032b>;
def _H : sve2p1_vector_to_pred<{0, 0, 1, ?}, mnemonic, PPR16, VectorIndexD32b> {
bits<1> index;
let Inst{17} = index;
@@ -9970,6 +9983,25 @@ multiclass sve2p1_vector_to_pred<string mnemonic> {
def : InstAlias<mnemonic # "\t$Pd, $Zn",
(!cast<Instruction>(NAME # _B) PPR8:$Pd, ZPRAny:$Zn, 0), 1>;
+
+ // any_lane
+ def : Pat<(nxv16i1 (Op_lane (nxv16i8 ZPRAny:$Zn), (i32 timm32_0_0:$Idx))),
+ (!cast<Instruction>(NAME # _B) ZPRAny:$Zn, timm32_0_0:$Idx)>;
+ def : Pat<(nxv8i1 (Op_lane (nxv8i16 ZPRAny:$Zn), (i32 timm32_0_1:$Idx))),
+ (!cast<Instruction>(NAME # _H) ZPRAny:$Zn, timm32_0_1:$Idx)>;
+ def : Pat<(nxv4i1 (Op_lane (nxv4i32 ZPRAny:$Zn), (i32 timm32_0_3:$Idx))),
+ (!cast<Instruction>(NAME # _S) ZPRAny:$Zn, timm32_0_3:$Idx)>;
+ def : Pat<(nxv2i1 (Op_lane (nxv2i64 ZPRAny:$Zn), (i32 timm32_0_7:$Idx))),
+ (!cast<Instruction>(NAME # _D) ZPRAny:$Zn, timm32_0_7:$Idx)>;
+ // lane_0
+ def : Pat<(nxv16i1 (Op (nxv16i8 ZPRAny:$Zn))),
+ (!cast<Instruction>(NAME # _B) ZPRAny:$Zn, 0)>;
+ def : Pat<(nxv8i1 (Op (nxv8i16 ZPRAny:$Zn))),
+ (!cast<Instruction>(NAME # _H) ZPRAny:$Zn, 0)>;
+ def : Pat<(nxv4i1 (Op (nxv4i32 ZPRAny:$Zn))),
+ (!cast<Instruction>(NAME # _S) ZPRAny:$Zn, 0)>;
+ def : Pat<(nxv2i1 (Op (nxv2i64 ZPRAny:$Zn))),
+ (!cast<Instruction>(NAME # _D) ZPRAny:$Zn, 0)>;
}
@@ -9993,7 +10025,8 @@ class sve2p1_pred_to_vector<bits<4> opc, string mnemonic,
let hasSideEffects = 0;
}
-multiclass sve2p1_pred_to_vector<string mnemonic> {
+multiclass sve2p1_pred_to_vector<string mnemonic, SDPatternOperator MergeOp,
+ SDPatternOperator ZeroOp> {
def _B : sve2p1_pred_to_vector<{0, 0, 0, 1}, mnemonic, PPR8, VectorIndex0>;
def _H : sve2p1_pred_to_vector<{0, 0, 1, ?}, mnemonic, PPR16, VectorIndexD32b> {
bits<1> index;
@@ -10011,6 +10044,24 @@ multiclass sve2p1_pred_to_vector<string mnemonic> {
def : InstAlias<mnemonic # "\t$Zd, $Pn",
(!cast<Instruction>(NAME # _B) ZPRAny:$Zd, 0, PPR8:$Pn), 1>;
+
+ // Merge
+ def : Pat<(nxv8i16 (MergeOp (nxv8i16 ZPRAny:$Zd), (nxv8i1 PPR16:$Pn), (i32 timm32_1_1:$Idx))),
+ (!cast<Instruction>(NAME # _H) ZPRAny:$Zd, timm32_1_1:$Idx, PPR16:$Pn)>;
+ def : Pat<(nxv4i32 (MergeOp (nxv4i32 ZPRAny:$Zd), (nxv4i1 PPR32:$Pn), (i32 timm32_1_3:$Idx))),
+ (!cast<Instruction>(NAME # _S) ZPRAny:$Zd, timm32_1_3:$Idx, PPR32:$Pn)>;
+ def : Pat<(nxv2i64 (MergeOp (nxv2i64 ZPRAny:$Zd), (nxv2i1 PPR64:$Pn), (i32 timm32_1_7:$Idx))),
+ (!cast<Instruction>(NAME # _D) ZPRAny:$Zd, timm32_1_7:$Idx, PPR64:$Pn)>;
+
+ // Zero
+ def : Pat<(nxv16i8 (ZeroOp (nxv16i1 PPR8:$Pn))),
+ (!cast<Instruction>(NAME # _B) (IMPLICIT_DEF), 0, PPR8:$Pn)>;
+ def : Pat<(nxv8i16 (ZeroOp (nxv8i1 PPR16:$Pn))),
+ (!cast<Instruction>(NAME # _H) (IMPLICIT_DEF), 0, PPR16:$Pn)>;
+ def : Pat<(nxv4i32 (ZeroOp (nxv4i1 PPR32:$Pn))),
+ (!cast<Instruction>(NAME # _S) (IMPLICIT_DEF), 0, PPR32:$Pn)>;
+ def : Pat<(nxv2i64 (ZeroOp (nxv2i1 PPR64:$Pn))),
+ (!cast<Instruction>(NAME # _D) (IMPLICIT_DEF), 0, PPR64:$Pn)>;
}
@@ -10066,18 +10117,41 @@ class sve2p1_permute_vec_elems_q<bits<2> sz, bits<3> opc, string mnemonic,
let hasSideEffects = 0;
}
-multiclass sve2p1_permute_vec_elems_q<bits<3> opc, string mnemonic> {
+multiclass sve2p1_permute_vec_elems_q<bits<3> opc, string mnemonic,
+ SDPatternOperator op> {
def _B : sve2p1_permute_vec_elems_q<0b00, opc, mnemonic, ZPR8, ZPR8>;
def _H : sve2p1_permute_vec_elems_q<0b01, opc, mnemonic, ZPR16, ZPR16>;
def _S : sve2p1_permute_vec_elems_q<0b10, opc, mnemonic, ZPR32, ZPR32>;
def _D : sve2p1_permute_vec_elems_q<0b11, opc, mnemonic, ZPR64, ZPR64>;
+
+ def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
}
-multiclass sve2p1_tblq<string mnemonic> {
+multiclass sve2p1_tblq<string mnemonic, SDPatternOperator op> {
def _B : sve2p1_permute_vec_elems_q<0b00, 0b110, mnemonic, ZPR8, Z_b>;
def _H : sve2p1_permute_vec_elems_q<0b01, 0b110, mnemonic, ZPR16, Z_h>;
def _S : sve2p1_permute_vec_elems_q<0b10, 0b110, mnemonic, ZPR32, Z_s>;
def _D : sve2p1_permute_vec_elems_q<0b11, 0b110, mnemonic, ZPR64, Z_d>;
+
+ def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8i16, !cast<Instruction>(NAME # _H)>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-extq.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-extq.ll
new file mode 100644
index 000000000000000..efe19432f9c32e9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-extq.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s
+
+define <vscale x 16 x i8> @test_extq_i8 (<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
+; CHECK-LABEL: test_extq_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: extq z0.b, z0.b, z1.b, #0
+; CHECK-NEXT: ret
+ %res = call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm, i32 0)
+ ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @test_extq_i16 (<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
+; CHECK-LABEL: test_extq_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: extq z0.b, z0.b, z1.b, #1
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm, i32 1)
+ ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @test_extq_i32 (<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
+; CHECK-LABEL: test_extq_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: extq z0.b, z0.b, z1.b, #2
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm, i32 2)
+ ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @test_extq_i64 (<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm) {
+; CHECK-LABEL: test_extq_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: extq z0.b, z0.b, z1.b, #3
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm, i32 3)
+ ret <vscale x 2 x i64> %res
+}
+
+define <vscale x 8 x half> @test_extq_f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm) {
+; CHECK-LABEL: test_extq_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: extq z0.b, z0.b, z1.b, #4
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x half> @llvm.aarch64.sve.extq.lane.nxv8f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm, i32 4)
+ ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @test_extq_f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm) {
+; CHECK-LABEL: test_extq_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: extq z0.b, z0.b, z1.b, #5
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x float> @llvm.aarch64.sve.extq.lane.nxv4f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm, i32 5)
+ ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @test_extq_f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm) {
+; CHECK-LABEL: test_extq_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: extq z0.b, z0.b, z1.b, #6
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x double> @llvm.aarch64.sve.extq.lane.nxv2f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm, i32 6)
+ ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x bfloat> @test_extq_bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
+; CHECK-LABEL: test_extq_bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: extq z0.b, z0.b, z1.b, #15
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.lane.nxv8bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm, i32 15)
+ ret <vscale x 8 x bfloat> %res
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
+declare <vscale x 8 x half> @llvm.aarch64.sve.extq.lane.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32)
+declare <vscale x 4 x float> @llvm.aarch64.sve.extq.lane.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)
+declare <vscale x 2 x double> @llvm.aarch64.sve.extq.lane.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.lane.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-pmov-to-pred.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-pmov-to-pred.ll
new file mode 100644
index 000000000000000..7cae1d2c216b616
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-pmov-to-pred.ll
@@ -0,0 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s
+
+define <vscale x 16 x i1> @test_pmov_to_pred_i8(<vscale x 16 x i8> %zn) {
+; CHECK-LABEL: test_pmov_to_pred_i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.pred.lane.nxv16i8
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ entry:
+ %res = call <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv16i8(<vscale x 16 x i8> %zn, i32 0)
+ ret <vscale x 16 x i1> %res
+}
+
+define <vscale x 8 x i1> @test_pmov_to_pred_i16(<vscale x 8 x i16> %zn) {
+; CHECK-LABEL: test_pmov_to_pred_i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: mov z8.d, z0.d
+; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16
+; CHECK-NEXT: mov z0.d, z8.d
+; CHECK-NEXT: mov w0, #1 // =0x1
+; CHECK-NEXT: mov p4.b, p0.b
+; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16
+; CHECK-NEXT: ptrue p1.h
+; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: eor p0.b, p1/z, p4.b, p0.b
+; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ entry:
+ %res1 = call <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16(<vscale x 8 x i16> %zn, i32 0)
+ %res2 = call <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16(<vscale x 8 x i16> %zn, i32 1)
+
+ %res = add <vscale x 8 x i1> %res1, %res2
+ ret <vscale x 8 x i1> %res
+}
+
+define <vscale x 4 x i1> @test_pmov_to_pred_i32(<vscale x 4 x i32> %zn) {
+; CHECK-LABEL: test_pmov_to_pred_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: mov z8.d, z0.d
+; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32
+; CHECK-NEXT: mov z0.d, z8.d
+; CHECK-NEXT: mov w0, #3 // =0x3
+; CHECK-NEXT: mov p4.b, p0.b
+; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32
+; CHECK-NEXT: ptrue p1.s
+; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: eor p0.b, p1/z, p4.b, p0.b
+; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ entry:
+ %res1 = call <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32(<vscale x 4 x i32> %zn, i32 0)
+ %res2 = call <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32(<vscale x 4 x i32> %zn, i32 3)
+
+ %res = add <vscale x 4 x i1> %res1, %res2
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 2 x i1> @test_pmov_to_pred_i64(<vscale x 2 x i64> %zn) {
+; CHECK-LABEL: test_pmov_to_pred_i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: mov z8.d, z0.d
+; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64
+; CHECK-NEXT: mov z0.d, z8.d
+; CHECK-NEXT: mov w0, #7 // =0x7
+; CHECK-NEXT: mov p4.b, p0.b
+; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64
+; CHECK-NEXT: ptrue p1.d
+; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: eor p0.b, p1/z, p4.b, p0.b
+; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ entry:
+ %res1 = call <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64(<vscale x 2 x i64> %zn, i32 0)
+ %res2 = call <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64(<vscale x 2 x i64> %zn, i32 7)
+
+ %res = add <vscale x 2 x i1> %res1, %res2
+ ret <vscale x 2 x i1> %res
+}
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv16i8(<vscale x 16 x i8>, i32)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16(<vscale x 8 x i16>, i32)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32(<vscale x 4 x i32>, i32)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64(<vscale x 2 x i64>, i32)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-pmov-to-vector.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-pmov-to-vector.ll
new file mode 100644
index 000000000000000..58b240b0fbd6806
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-pmov-to-vector.ll
@@ -0,0 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s
+
+; Merge
+
+define <vscale x 8 x i16> @test_pmov_to_vector_i16(<vscale x 8 x i16> %zn, <vscale x 8 x i1> %pn) {
+; CHECK-LABEL: test_pmov_to_vector_i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: mov w0, #1 // =0x1
+; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv8i16
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ entry:
+ %res = call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i1> %pn, i32 1)
+ ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @test_pmov_to_vector_i32(<vscale x 4 x i32> %zn, <vscale x 4 x i1> %pn) {
+; CHECK-LABEL: test_pmov_to_vector_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: mov w0, #3 // =0x3
+; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv4i32
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ entry:
+ %res = call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i1> %pn, i32 3)
+ ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @test_pmov_to_vector_i64(<vscale x 2 x i64> %zn, <vscale x 2 x i1> %pn) {
+; CHECK-LABEL: test_pmov_to_vector_i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: mov w0, #7 // =0x7
+; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv2i64
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ entry:
+ %res = call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i1> %pn, i32 7)
+ ret <vscale x 2 x i64> %res
+}
+
+
+; Zero
+
+define <vscale x 16 x i8> @test_pmov_to_vector_zero_i8(<vscale x 16 x i1> %pn) {
+; CHECK-LABEL: test_pmov_to_vector_zero_i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv16i8
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ entry:
+ %res = call <vscale x 16 x i8> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv16i8(<vscale x 16 x i1> %pn)
+ ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @test_pmov_to_vector_zero_i16(<vscale x 8 x i1> %pn) {
+; CHECK-LABEL: test_pmov_to_vector_zero_i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv8i16
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ entry:
+ %res = call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv8i16(<vscale x 8 x i1> %pn)
+ ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @test_pmov_to_vector_zero_i32(<vscale x 4 x i1> %pn) {
+; CHECK-LABEL: test_pmov_to_vector_zero_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv4i32
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ entry:
+ %res = call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv4i32(<vscale x 4 x i1> %pn)
+ ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @test_pmov_to_vector_zero_i64(<vscale x 2 x i1> %pn) {
+; CHECK-LABEL: test_pmov_to_vector_zero_i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv2i64
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ entry:
+ %res = call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv2i64(<vscale x 2 x i1> %pn)
+ ret <vscale x 2 x i64> %res
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv16i8(<vscale x 16 x i1>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv8i16(<vscale x 8 x i1>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv4i32(<vscale x 4 x i1>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv2i64(<vscale x 2 x i1>)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tblq.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tblq.ll
new file mode 100644
index 000000000000000..cc9bbcfe47102b2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tblq.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s
+
+define <vscale x 16 x i8> @test_tblq_i8 (<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
+; CHECK-LABEL: test_tblq_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: tblq z0.b, { z0.b }, z1.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 16 x i8> @llvm.aarch64.sve.tblq.nxv16i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+ ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @test_tblq_i16 (<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
+; CHECK-LABEL: test_tblq_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: tblq z0.h, { z0.h }, z1.h
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x i16> @llvm.aarch64.sve.tblq.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
+ ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @test_tblq_i32 (<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
+; CHECK-LABEL: test_tblq_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: tblq z0.s, { z0.s }, z1.s
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x i32> @llvm.aarch64.sve.tblq.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm)
+ ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @test_tblq_i64 (<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm) {
+; CHECK-LABEL: test_tblq_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: tblq z0.d, { z0.d }, z1.d
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i64> @llvm.aarch64.sve.tblq.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm)
+ ret <vscale x 2 x i64> %res
+}
+
+define <vscale x 8 x half> @test_tblq_f16(<vscale x 8 x half> %zn, <vscale x 8 x i16> %zm) {
+; CHECK-LABEL: test_tblq_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: tblq z0.h, { z0.h }, z1.h
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x half> @llvm.aarch64.sve.tblq.nxv8f16(<vscale x 8 x half> %zn, <vscale x 8 x i16> %zm)
+ ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @test_tblq_f32(<vscale x 4 x float> %zn, <vscale x 4 x i32> %zm) {
+; CHECK-LABEL: test_tblq_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: tblq z0.s, { z0.s }, z1.s
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x float> @llvm.aarch64.sve.tblq.nxv4f32(<vscale x 4 x float> %zn, <vscale x 4 x i32> %zm)
+ ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @test_tblq_f64(<vscale x 2 x double> %zn, <vscale x 2 x i64> %zm) {
+; CHECK-LABEL: test_tblq_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: tblq z0.d, { z0.d }, z1.d
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x double> @llvm.aarch64.sve.tblq.nxv2f64(<vscale x 2 x double> %zn, <vscale x 2 x i64> %zm)
+ ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x bfloat> @test_tblq_bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x i16> %zm) {
+; CHECK-LABEL: test_tblq_bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: tblq z0.h, { z0.h }, z1.h
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tblq.nxv8bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x i16> %zm)
+ ret <vscale x 8 x bfloat> %res
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.tblq.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.tblq.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.tblq.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.tblq.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 8 x half> @llvm.aarch64.sve.tblq.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i16>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.tblq.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i32>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.tblq.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i64>)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tblq.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i16>)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tbxq.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tbxq.ll
new file mode 100644
index 000000000000000..29265b5d02e5188
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tbxq.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s
+
+define <vscale x 16 x i8> @test_tbxq_i8 (<vscale x 16 x i8> %passthru, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
+; CHECK-LABEL: test_tbxq_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: tbxq z0.b, z1.b, z2.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8> %passthru, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+ ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @test_tbxq_i16 (<vscale x 8 x i16> %passthru, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
+; CHECK-LABEL: test_tbxq_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: tbxq z0.h, z1.h, z2.h
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x i16> @llvm.aarch64.sve.tbxq.nxv8i16(<vscale x 8 x i16> %passthru, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
+ ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @test_tbxq_i32 (<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
+; CHECK-LABEL: test_tbxq_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: tbxq z0.s, z1.s, z2.s
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x i32> @llvm.aarch64.sve.tbxq.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm)
+ ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @test_tbxq_i64 (<vscale x 2 x i64> %passthru, <vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm) {
+; CHECK-LABEL: test_tbxq_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: tbxq z0.d, z1.d, z2.d
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i64> @llvm.aarch64.sve.tbxq.nxv2i64(<vscale x 2 x i64> %passthru, <vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm)
+ ret <vscale x 2 x i64> %res
+}
+
+define <vscale x 8 x half> @test_tblq_f16(<vscale x 8 x half> %passthru, <vscale x 8 x half> %zn, <vscale x 8 x i16> %zm) {
+; CHECK-LABEL: test_tblq_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: tbxq z0.h, z1.h, z2.h
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x half> @llvm.aarch64.sve.tbxq.nxv8f16(<vscale x 8 x half> %passthru, <vscale x 8 x half> %zn, <vscale x 8 x i16> %zm)
+ ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @test_tbxq_f32(<vscale x 4 x float> %passthru, <vscale x 4 x float> %zn, <vscale x 4 x i32> %zm) {
+; CHECK-LABEL: test_tbxq_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: tbxq z0.s, z1.s, z2.s
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x float> @llvm.aarch64.sve.tbxq.nxv4f32(<vscale x 4 x float> %passthru, <vscale x 4 x float> %zn, <vscale x 4 x i32> %zm)
+ ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @test_tbxq_f64(<vscale x 2 x double> %passthru, <vscale x 2 x double> %zn, <vscale x 2 x i64> %zm) {
+; CHECK-LABEL: test_tbxq_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: tbxq z0.d, z1.d, z2.d
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x double> @llvm.aarch64.sve.tbxq.nxv2f64(<vscale x 2 x double> %passthru, <vscale x 2 x double> %zn, <vscale x 2 x i64> %zm)
+ ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x bfloat> @test_tbxq_bf16(<vscale x 8 x bfloat> %passthru, <vscale x 8 x bfloat> %zn, <vscale x 8 x i16> %zm) {
+; CHECK-LABEL: test_tbxq_bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: tbxq z0.h, z1.h, z2.h
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbxq.nxv8bf16(<vscale x 8 x bfloat> %passthru, <vscale x 8 x bfloat> %zn, <vscale x 8 x i16> %zm)
+ ret <vscale x 8 x bfloat> %res
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.tbxq.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.tbxq.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.tbxq.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 8 x half> @llvm.aarch64.sve.tbxq.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i16>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.tbxq.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i32>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.tbxq.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i64>)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tbxq.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i16>)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq1.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq1.ll
new file mode 100644
index 000000000000000..7a83b1500846285
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq1.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s
+
+define <vscale x 16 x i8> @test_uzpq1_i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
+; CHECK-LABEL: test_uzpq1_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzpq1 z0.b, z0.b, z1.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+ ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @test_uzpq1_i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
+; CHECK-LABEL: test_uzpq1_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzpq1 z0.h, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq1.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
+ ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @test_uzpq1_i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
+; CHECK-LABEL: test_uzpq1_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzpq1 z0.s, z0.s, z1.s
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq1.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm)
+ ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @test_uzpq1_i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm) {
+; CHECK-LABEL: test_uzpq1_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzpq1 z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq1.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm)
+ ret <vscale x 2 x i64> %res
+}
+
+define <vscale x 8 x half> @test_uzpq1_f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm) {
+; CHECK-LABEL: test_uzpq1_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzpq1 z0.h, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x half> @llvm.aarch64.sve.uzpq1.nxv8f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm)
+ ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @test_uzpq1_f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm) {
+; CHECK-LABEL: test_uzpq1_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzpq1 z0.s, z0.s, z1.s
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x float> @llvm.aarch64.sve.uzpq1.nxv4f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm)
+ ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @test_uzpq1_f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm) {
+; CHECK-LABEL: test_uzpq1_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzpq1 z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x double> @llvm.aarch64.sve.uzpq1.nxv2f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm)
+ ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x bfloat> @test_uzpq1_bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
+; CHECK-LABEL: test_uzpq1_bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzpq1 z0.h, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq1.nxv8bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
+ ret <vscale x 8 x bfloat> %res
+}
+
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uzpq1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uzpq1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.uzpq1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.uzpq1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.uzpq1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.uzpq1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq2.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq2.ll
new file mode 100644
index 000000000000000..8695f1c3e1ce959
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq2.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s
+
+define <vscale x 16 x i8> @test_uzpq2_i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
+; CHECK-LABEL: test_uzpq2_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzpq2 z0.b, z0.b, z1.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+ ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @test_uzpq2_i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
+; CHECK-LABEL: test_uzpq2_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzpq2 z0.h, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq2.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
+ ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @test_uzpq2_i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
+; CHECK-LABEL: test_uzpq2_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzpq2 z0.s, z0.s, z1.s
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq2.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm)
+ ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @test_uzpq2_i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm) {
+; CHECK-LABEL: test_uzpq2_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzpq2 z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq2.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm)
+ ret <vscale x 2 x i64> %res
+}
+
+define <vscale x 8 x half> @test_uzpq2_f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm) {
+; CHECK-LABEL: test_uzpq2_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzpq2 z0.h, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x half> @llvm.aarch64.sve.uzpq2.nxv8f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm)
+ ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @test_uzpq2_f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm) {
+; CHECK-LABEL: test_uzpq2_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzpq2 z0.s, z0.s, z1.s
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x float> @llvm.aarch64.sve.uzpq2.nxv4f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm)
+ ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @test_uzpq2_f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm) {
+; CHECK-LABEL: test_uzpq2_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzpq2 z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x double> @llvm.aarch64.sve.uzpq2.nxv2f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm)
+ ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x bfloat> @test_uzpq2_bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
+; CHECK-LABEL: test_uzpq2_bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzpq2 z0.h, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq2.nxv8bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
+ ret <vscale x 8 x bfloat> %res
+}
+
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uzpq2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uzpq2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.uzpq2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.uzpq2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.uzpq2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.uzpq2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq1.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq1.ll
new file mode 100644
index 000000000000000..89383c1a3f230e8
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq1.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s
+
+define <vscale x 16 x i8> @test_zipq1_i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
+; CHECK-LABEL: test_zipq1_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zipq1 z0.b, z0.b, z1.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+ ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @test_zipq1_i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
+; CHECK-LABEL: test_zipq1_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zipq1 z0.h, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x i16> @llvm.aarch64.sve.zipq1.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
+ ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @test_zipq1_i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
+; CHECK-LABEL: test_zipq1_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zipq1 z0.s, z0.s, z1.s
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x i32> @llvm.aarch64.sve.zipq1.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm)
+ ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @test_zipq1_i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm) {
+; CHECK-LABEL: test_zipq1_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zipq1 z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i64> @llvm.aarch64.sve.zipq1.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm)
+ ret <vscale x 2 x i64> %res
+}
+
+define <vscale x 8 x half> @test_zipq1_f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm) {
+; CHECK-LABEL: test_zipq1_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zipq1 z0.h, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x half> @llvm.aarch64.sve.zipq1.nxv8f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm)
+ ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @test_zipq1_f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm) {
+; CHECK-LABEL: test_zipq1_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zipq1 z0.s, z0.s, z1.s
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x float> @llvm.aarch64.sve.zipq1.nxv4f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm)
+ ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @test_zipq1_f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm) {
+; CHECK-LABEL: test_zipq1_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zipq1 z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x double> @llvm.aarch64.sve.zipq1.nxv2f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm)
+ ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x bfloat> @test_zipq1_bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
+; CHECK-LABEL: test_zipq1_bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zipq1 z0.h, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq1.nxv8bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
+ ret <vscale x 8 x bfloat> %res
+}
+
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.zipq1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.zipq1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.zipq1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.zipq1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.zipq1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.zipq1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq2.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq2.ll
new file mode 100644
index 000000000000000..c9aaae3371e80ef
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq2.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s
+
+define <vscale x 16 x i8> @test_zipq2_i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
+; CHECK-LABEL: test_zipq2_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zipq2 z0.b, z0.b, z1.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+ ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @test_zipq2_i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
+; CHECK-LABEL: test_zipq2_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zipq2 z0.h, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x i16> @llvm.aarch64.sve.zipq2.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
+ ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @test_zipq2_i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
+; CHECK-LABEL: test_zipq2_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zipq2 z0.s, z0.s, z1.s
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x i32> @llvm.aarch64.sve.zipq2.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm)
+ ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @test_zipq2_i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm) {
+; CHECK-LABEL: test_zipq2_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zipq2 z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i64> @llvm.aarch64.sve.zipq2.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm)
+ ret <vscale x 2 x i64> %res
+}
+
+define <vscale x 8 x half> @test_zipq2_f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm) {
+; CHECK-LABEL: test_zipq2_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zipq2 z0.h, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x half> @llvm.aarch64.sve.zipq2.nxv8f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm)
+ ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @test_zipq2_f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm) {
+; CHECK-LABEL: test_zipq2_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zipq2 z0.s, z0.s, z1.s
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x float> @llvm.aarch64.sve.zipq2.nxv4f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm)
+ ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @test_zipq2_f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm) {
+; CHECK-LABEL: test_zipq2_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zipq2 z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x double> @llvm.aarch64.sve.zipq2.nxv2f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm)
+ ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x bfloat> @test_zipq2_bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
+; CHECK-LABEL: test_zipq2_bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zipq2 z0.h, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq2.nxv8bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
+ ret <vscale x 8 x bfloat> %res
+}
+
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.zipq2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.zipq2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.zipq2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.zipq2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.zipq2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.zipq2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
>From aa4911dbadc8e80f45df267f88eb17e4a61356ba Mon Sep 17 00:00:00 2001
From: Caroline Concatto <caroline.concatto at arm.com>
Date: Fri, 10 Nov 2023 12:45:00 +0000
Subject: [PATCH 2/4] Update the clang tests
---
.../acle_sve2p1_extq.c | 34 +++++++++----------
.../acle_sve2p1_tblq.c | 20 +++++------
.../acle_sve2p1_tbxq.c | 34 +++++++++----------
.../acle_sve2p1_uzpq1.c | 34 +++++++++----------
.../acle_sve2p1_uzpq2.c | 34 +++++++++----------
.../acle_sve2p1_zipq1.c | 34 +++++++++----------
.../acle_sve2p1_zipq2.c | 34 +++++++++----------
7 files changed, 112 insertions(+), 112 deletions(-)
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c
index c49f8c838ace373..7704db5667a2a7f 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c
@@ -1,14 +1,14 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
// REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
#include <arm_sve.h>
@@ -26,7 +26,7 @@
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 0)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svextq_lane_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svextq_lane_u8u11__SVUint8_tS_
// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 0)
@@ -42,7 +42,7 @@ svuint8_t test_svextq_lane_u8(svuint8_t zn, svuint8_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 4)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svextq_lane_s8u10__SVInt8_tu10__SVInt8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svextq_lane_s8u10__SVInt8_tS_
// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 4)
@@ -58,7 +58,7 @@ svint8_t test_svextq_lane_s8(svint8_t zn, svint8_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 1)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svextq_lane_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svextq_lane_u16u12__SVUint16_tS_
// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 1)
@@ -74,7 +74,7 @@ svuint16_t test_svextq_lane_u16(svuint16_t zn, svuint16_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 5)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svextq_lane_s16u11__SVInt16_tu11__SVInt16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svextq_lane_s16u11__SVInt16_tS_
// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 5)
@@ -90,7 +90,7 @@ svint16_t test_svextq_lane_s16(svint16_t zn, svint16_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 2)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svextq_lane_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svextq_lane_u32u12__SVUint32_tS_
// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 2)
@@ -106,7 +106,7 @@ svuint32_t test_svextq_lane_u32(svuint32_t zn, svuint32_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 6)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svextq_lane_s32u11__SVInt32_tu11__SVInt32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svextq_lane_s32u11__SVInt32_tS_
// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 6)
@@ -122,7 +122,7 @@ svint32_t test_svextq_lane_s32(svint32_t zn, svint32_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 3)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svextq_lane_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svextq_lane_u64u12__SVUint64_tS_
// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 3)
@@ -138,7 +138,7 @@ svuint64_t test_svextq_lane_u64(svuint64_t zn, svuint64_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 7)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svextq_lane_s64u11__SVInt64_tu11__SVInt64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svextq_lane_s64u11__SVInt64_tS_
// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 7)
@@ -154,7 +154,7 @@ svint64_t test_svextq_lane_s64(svint64_t zn, svint64_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.lane.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 8)
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z20test_svextq_lane_f16u13__SVFloat16_tu13__SVFloat16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z20test_svextq_lane_f16u13__SVFloat16_tS_
// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.lane.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 8)
@@ -170,7 +170,7 @@ svfloat16_t test_svextq_lane_f16(svfloat16_t zn, svfloat16_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.lane.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 9)
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z20test_svextq_lane_f32u13__SVFloat32_tu13__SVFloat32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z20test_svextq_lane_f32u13__SVFloat32_tS_
// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.lane.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 9)
@@ -186,7 +186,7 @@ svfloat32_t test_svextq_lane_f32(svfloat32_t zn, svfloat32_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.lane.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 10)
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z20test_svextq_lane_f64u13__SVFloat64_tu13__SVFloat64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z20test_svextq_lane_f64u13__SVFloat64_tS_
// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.lane.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 10)
@@ -202,7 +202,7 @@ svfloat64_t test_svextq_lane_f64(svfloat64_t zn, svfloat64_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.lane.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 11)
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z21test_svextq_lane_bf16u14__SVBFloat16_tu14__SVBFloat16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z21test_svextq_lane_bf16u14__SVBfloat16_tS_
// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.lane.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 11)
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tblq.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tblq.c
index 56e95d1abace317..6c04413c238a62b 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tblq.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tblq.c
@@ -1,14 +1,14 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
// REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
@@ -27,7 +27,7 @@
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tblq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z14test_svtblq_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z14test_svtblq_u8u11__SVUint8_tS_
// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tblq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
@@ -43,7 +43,7 @@ svuint8_t test_svtblq_u8(svuint8_t zn, svuint8_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tblq.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z15test_svtblq_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z15test_svtblq_u16u12__SVUint16_tS_
// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tblq.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
@@ -59,7 +59,7 @@ svuint16_t test_svtblq_u16(svuint16_t zn, svuint16_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tblq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z15test_svtblq_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z15test_svtblq_u32u12__SVUint32_tS_
// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tblq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
@@ -75,7 +75,7 @@ svuint32_t test_svtblq_u32(svuint32_t zn, svuint32_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tblq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svtblq_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svtblq_u64u12__SVUint64_tS_
// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tblq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
@@ -203,7 +203,7 @@ svfloat64_t test_svtblq_f64(svfloat64_t zn, svuint64_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tblq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z16test_svtblq_bf16u14__SVBFloat16_tu12__SVUint16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z16test_svtblq_bf16u14__SVBfloat16_tu12__SVUint16_t
// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tblq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x i16> [[ZM]])
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tbxq.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tbxq.c
index eeb589137d4f6d6..0ad7107b676709d 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tbxq.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tbxq.c
@@ -1,14 +1,14 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
// REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
@@ -27,7 +27,7 @@
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8> [[PASSTHRU]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z14test_svtbxq_u8u11__SVUint8_tu11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z14test_svtbxq_u8u11__SVUint8_tS_S_
// CPP-CHECK-SAME: (<vscale x 16 x i8> [[PASSTHRU:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8> [[PASSTHRU]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
@@ -43,7 +43,7 @@ svuint8_t test_svtbxq_u8(svuint8_t passthru, svuint8_t zn, svuint8_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tbxq.nxv8i16(<vscale x 8 x i16> [[PASSTHRU]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z15test_svtbxq_u16u12__SVUint16_tu12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z15test_svtbxq_u16u12__SVUint16_tS_S_
// CPP-CHECK-SAME: (<vscale x 8 x i16> [[PASSTHRU:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tbxq.nxv8i16(<vscale x 8 x i16> [[PASSTHRU]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
@@ -59,7 +59,7 @@ svuint16_t test_svtbxq_u16(svuint16_t passthru, svuint16_t zn, svuint16_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tbxq.nxv4i32(<vscale x 4 x i32> [[PASSTHRU]], <vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z15test_svtbxq_u32u12__SVUint32_tu12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z15test_svtbxq_u32u12__SVUint32_tS_S_
// CPP-CHECK-SAME: (<vscale x 4 x i32> [[PASSTHRU:%.*]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tbxq.nxv4i32(<vscale x 4 x i32> [[PASSTHRU]], <vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
@@ -75,7 +75,7 @@ svuint32_t test_svtbxq_u32(svuint32_t passthru, svuint32_t zn, svuint32_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tbxq.nxv2i64(<vscale x 2 x i64> [[PASSTHRU]], <vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svtbxq_u64u12__SVUint64_tu12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svtbxq_u64u12__SVUint64_tS_S_
// CPP-CHECK-SAME: (<vscale x 2 x i64> [[PASSTHRU:%.*]], <vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tbxq.nxv2i64(<vscale x 2 x i64> [[PASSTHRU]], <vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
@@ -91,7 +91,7 @@ svuint64_t test_svtbxq_u64(svuint64_t passthru, svuint64_t zn, svuint64_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8> [[PASSTHRU]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z14test_svtbxq_s8u10__SVInt8_tu10__SVInt8_tu11__SVUint8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z14test_svtbxq_s8u10__SVInt8_tS_u11__SVUint8_t
// CPP-CHECK-SAME: (<vscale x 16 x i8> [[PASSTHRU:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8> [[PASSTHRU]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
@@ -107,7 +107,7 @@ svint8_t test_svtbxq_s8(svint8_t passthru, svint8_t zn, svuint8_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tbxq.nxv8i16(<vscale x 8 x i16> [[PASSTHRU]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z15test_svtbxq_s16u11__SVInt16_tu11__SVInt16_tu12__SVUint16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z15test_svtbxq_s16u11__SVInt16_tS_u12__SVUint16_t
// CPP-CHECK-SAME: (<vscale x 8 x i16> [[PASSTHRU:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tbxq.nxv8i16(<vscale x 8 x i16> [[PASSTHRU]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
@@ -123,7 +123,7 @@ svint16_t test_svtbxq_s16(svint16_t passthru, svint16_t zn, svuint16_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tbxq.nxv4i32(<vscale x 4 x i32> [[PASSTHRU]], <vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z15test_svtbxq_s32u11__SVInt32_tu11__SVInt32_tu12__SVUint32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z15test_svtbxq_s32u11__SVInt32_tS_u12__SVUint32_t
// CPP-CHECK-SAME: (<vscale x 4 x i32> [[PASSTHRU:%.*]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tbxq.nxv4i32(<vscale x 4 x i32> [[PASSTHRU]], <vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
@@ -139,7 +139,7 @@ svint32_t test_svtbxq_s32(svint32_t passthru, svint32_t zn, svuint32_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tbxq.nxv2i64(<vscale x 2 x i64> [[PASSTHRU]], <vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svtbxq_s64u11__SVInt64_tu11__SVInt64_tu12__SVUint64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svtbxq_s64u11__SVInt64_tS_u12__SVUint64_t
// CPP-CHECK-SAME: (<vscale x 2 x i64> [[PASSTHRU:%.*]], <vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tbxq.nxv2i64(<vscale x 2 x i64> [[PASSTHRU]], <vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
@@ -155,7 +155,7 @@ svint64_t test_svtbxq_s64(svint64_t passthru, svint64_t zn, svuint64_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.tbxq.nxv8f16(<vscale x 8 x half> [[PASSTHRU]], <vscale x 8 x half> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z15test_svtbxq_f16u13__SVFloat16_tu13__SVFloat16_tu12__SVUint16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z15test_svtbxq_f16u13__SVFloat16_tS_u12__SVUint16_t
// CPP-CHECK-SAME: (<vscale x 8 x half> [[PASSTHRU:%.*]], <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.tbxq.nxv8f16(<vscale x 8 x half> [[PASSTHRU]], <vscale x 8 x half> [[ZN]], <vscale x 8 x i16> [[ZM]])
@@ -171,7 +171,7 @@ svfloat16_t test_svtbxq_f16(svfloat16_t passthru, svfloat16_t zn, svuint16_t zm)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.tbxq.nxv4f32(<vscale x 4 x float> [[PASSTHRU]], <vscale x 4 x float> [[ZN]], <vscale x 4 x i32> [[ZM]])
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z15test_svtbxq_f32u13__SVFloat32_tu13__SVFloat32_tu12__SVUint32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z15test_svtbxq_f32u13__SVFloat32_tS_u12__SVUint32_t
// CPP-CHECK-SAME: (<vscale x 4 x float> [[PASSTHRU:%.*]], <vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.tbxq.nxv4f32(<vscale x 4 x float> [[PASSTHRU]], <vscale x 4 x float> [[ZN]], <vscale x 4 x i32> [[ZM]])
@@ -187,7 +187,7 @@ svfloat32_t test_svtbxq_f32(svfloat32_t passthru, svfloat32_t zn, svuint32_t zm)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.tbxq.nxv2f64(<vscale x 2 x double> [[PASSTHRU]], <vscale x 2 x double> [[ZN]], <vscale x 2 x i64> [[ZM]])
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z15test_svtbxq_f64u13__SVFloat64_tu13__SVFloat64_tu12__SVUint64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z15test_svtbxq_f64u13__SVFloat64_tS_u12__SVUint64_t
// CPP-CHECK-SAME: (<vscale x 2 x double> [[PASSTHRU:%.*]], <vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.tbxq.nxv2f64(<vscale x 2 x double> [[PASSTHRU]], <vscale x 2 x double> [[ZN]], <vscale x 2 x i64> [[ZM]])
@@ -203,7 +203,7 @@ svfloat64_t test_svtbxq_f64(svfloat64_t passthru, svfloat64_t zn, svuint64_t zm)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbxq.nxv8bf16(<vscale x 8 x bfloat> [[PASSTHRU]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z16test_svtbxq_bf16u14__SVBFloat16_tu14__SVBFloat16_tu12__SVUint16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z16test_svtbxq_bf16u14__SVBfloat16_tS_u12__SVUint16_t
// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[PASSTHRU:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbxq.nxv8bf16(<vscale x 8 x bfloat> [[PASSTHRU]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x i16> [[ZM]])
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c
index 0773f8d8d01966a..b0e2e4b9c7a5981 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c
@@ -1,14 +1,14 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
// REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
#include <arm_sve.h>
@@ -26,7 +26,7 @@
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svuzpq1_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svuzpq1_u8u11__SVUint8_tS_
// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
@@ -42,7 +42,7 @@ svuint8_t test_svuzpq1_u8(svuint8_t zn, svuint8_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svuzpq1_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svuzpq1_u16u12__SVUint16_tS_
// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
@@ -58,7 +58,7 @@ svuint16_t test_svuzpq1_u16(svuint16_t zn, svuint16_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svuzpq1_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svuzpq1_u32u12__SVUint32_tS_
// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
@@ -74,7 +74,7 @@ svuint32_t test_svuzpq1_u32(svuint32_t zn, svuint32_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svuzpq1_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svuzpq1_u64u12__SVUint64_tS_
// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
@@ -91,7 +91,7 @@ svuint64_t test_svuzpq1_u64(svuint64_t zn, svuint64_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svuzpq1_s8u10__SVInt8_tu10__SVInt8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svuzpq1_s8u10__SVInt8_tS_
// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
@@ -107,7 +107,7 @@ svint8_t test_svuzpq1_s8(svint8_t zn, svint8_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svuzpq1_s16u11__SVInt16_tu11__SVInt16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svuzpq1_s16u11__SVInt16_tS_
// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
@@ -123,7 +123,7 @@ svint16_t test_svuzpq1_s16(svint16_t zn, svint16_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svuzpq1_s32u11__SVInt32_tu11__SVInt32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svuzpq1_s32u11__SVInt32_tS_
// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
@@ -139,7 +139,7 @@ svint32_t test_svuzpq1_s32(svint32_t zn, svint32_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svuzpq1_s64u11__SVInt64_tu11__SVInt64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svuzpq1_s64u11__SVInt64_tS_
// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
@@ -156,7 +156,7 @@ svint64_t test_svuzpq1_s64(svint64_t zn, svint64_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.uzpq1.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z16test_svuzpq1_f16u13__SVFloat16_tu13__SVFloat16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z16test_svuzpq1_f16u13__SVFloat16_tS_
// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.uzpq1.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
@@ -172,7 +172,7 @@ svfloat16_t test_svuzpq1_f16(svfloat16_t zn, svfloat16_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.uzpq1.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z16test_svuzpq1_f32u13__SVFloat32_tu13__SVFloat32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z16test_svuzpq1_f32u13__SVFloat32_tS_
// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.uzpq1.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
@@ -188,7 +188,7 @@ svfloat32_t test_svuzpq1_f32(svfloat32_t zn, svfloat32_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.uzpq1.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z16test_svuzpq1_f64u13__SVFloat64_tu13__SVFloat64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z16test_svuzpq1_f64u13__SVFloat64_tS_
// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.uzpq1.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
@@ -204,7 +204,7 @@ svfloat64_t test_svuzpq1_f64(svfloat64_t zn, svfloat64_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq1.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z17test_svuzpq1_bf16u14__SVBFloat16_tu14__SVBFloat16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z17test_svuzpq1_bf16u14__SVBfloat16_tS_
// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq1.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c
index 9883a7ef21196a3..154b4564f9c6e82 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c
@@ -1,14 +1,14 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
// REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
#include <arm_sve.h>
@@ -26,7 +26,7 @@
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svuzpq2_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svuzpq2_u8u11__SVUint8_tS_
// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
@@ -42,7 +42,7 @@ svuint8_t test_svuzpq2_u8(svuint8_t zn, svuint8_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svuzpq2_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svuzpq2_u16u12__SVUint16_tS_
// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
@@ -58,7 +58,7 @@ svuint16_t test_svuzpq2_u16(svuint16_t zn, svuint16_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svuzpq2_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svuzpq2_u32u12__SVUint32_tS_
// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
@@ -74,7 +74,7 @@ svuint32_t test_svuzpq2_u32(svuint32_t zn, svuint32_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svuzpq2_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svuzpq2_u64u12__SVUint64_tS_
// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
@@ -90,7 +90,7 @@ svuint64_t test_svuzpq2_u64(svuint64_t zn, svuint64_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svuzpq2_s8u10__SVInt8_tu10__SVInt8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svuzpq2_s8u10__SVInt8_tS_
// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
@@ -106,7 +106,7 @@ svint8_t test_svuzpq2_s8(svint8_t zn, svint8_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svuzpq2_s16u11__SVInt16_tu11__SVInt16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svuzpq2_s16u11__SVInt16_tS_
// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
@@ -122,7 +122,7 @@ svint16_t test_svuzpq2_s16(svint16_t zn, svint16_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svuzpq2_s32u11__SVInt32_tu11__SVInt32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svuzpq2_s32u11__SVInt32_tS_
// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
@@ -138,7 +138,7 @@ svint32_t test_svuzpq2_s32(svint32_t zn, svint32_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svuzpq2_s64u11__SVInt64_tu11__SVInt64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svuzpq2_s64u11__SVInt64_tS_
// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
@@ -155,7 +155,7 @@ svint64_t test_svuzpq2_s64(svint64_t zn, svint64_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.uzpq2.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z16test_svuzpq2_f16u13__SVFloat16_tu13__SVFloat16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z16test_svuzpq2_f16u13__SVFloat16_tS_
// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.uzpq2.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
@@ -171,7 +171,7 @@ svfloat16_t test_svuzpq2_f16(svfloat16_t zn, svfloat16_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.uzpq2.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z16test_svuzpq2_f32u13__SVFloat32_tu13__SVFloat32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z16test_svuzpq2_f32u13__SVFloat32_tS_
// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.uzpq2.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
@@ -187,7 +187,7 @@ svfloat32_t test_svuzpq2_f32(svfloat32_t zn, svfloat32_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.uzpq2.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z16test_svuzpq2_f64u13__SVFloat64_tu13__SVFloat64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z16test_svuzpq2_f64u13__SVFloat64_tS_
// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.uzpq2.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
@@ -203,7 +203,7 @@ svfloat64_t test_svuzpq2_f64(svfloat64_t zn, svfloat64_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq2.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z17test_svuzpq2_bf16u14__SVBFloat16_tu14__SVBFloat16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z17test_svuzpq2_bf16u14__SVBfloat16_tS_
// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq2.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c
index c7a1a9b2c227f7a..d96c7eda374d896 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c
@@ -1,14 +1,14 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
// REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
#include <arm_sve.h>
@@ -26,7 +26,7 @@
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svzipq1_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svzipq1_u8u11__SVUint8_tS_
// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
@@ -42,7 +42,7 @@ svuint8_t test_svzipq1_u8(svuint8_t zn, svuint8_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svzipq1_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svzipq1_u16u12__SVUint16_tS_
// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
@@ -58,7 +58,7 @@ svuint16_t test_svzipq1_u16(svuint16_t zn, svuint16_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svzipq1_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svzipq1_u32u12__SVUint32_tS_
// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
@@ -74,7 +74,7 @@ svuint32_t test_svzipq1_u32(svuint32_t zn, svuint32_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svzipq1_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svzipq1_u64u12__SVUint64_tS_
// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
@@ -91,7 +91,7 @@ svuint64_t test_svzipq1_u64(svuint64_t zn, svuint64_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svzipq1_s8u10__SVInt8_tu10__SVInt8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svzipq1_s8u10__SVInt8_tS_
// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
@@ -107,7 +107,7 @@ svint8_t test_svzipq1_s8(svint8_t zn, svint8_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svzipq1_s16u11__SVInt16_tu11__SVInt16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svzipq1_s16u11__SVInt16_tS_
// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
@@ -123,7 +123,7 @@ svint16_t test_svzipq1_s16(svint16_t zn, svint16_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svzipq1_s32u11__SVInt32_tu11__SVInt32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svzipq1_s32u11__SVInt32_tS_
// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
@@ -139,7 +139,7 @@ svint32_t test_svzipq1_s32(svint32_t zn, svint32_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svzipq1_s64u11__SVInt64_tu11__SVInt64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svzipq1_s64u11__SVInt64_tS_
// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
@@ -156,7 +156,7 @@ svint64_t test_svzipq1_s64(svint64_t zn, svint64_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.zipq1.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z16test_svzipq1_f16u13__SVFloat16_tu13__SVFloat16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z16test_svzipq1_f16u13__SVFloat16_tS_
// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.zipq1.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
@@ -172,7 +172,7 @@ svfloat16_t test_svzipq1_f16(svfloat16_t zn, svfloat16_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.zipq1.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z16test_svzipq1_f32u13__SVFloat32_tu13__SVFloat32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z16test_svzipq1_f32u13__SVFloat32_tS_
// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.zipq1.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
@@ -188,7 +188,7 @@ svfloat32_t test_svzipq1_f32(svfloat32_t zn, svfloat32_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.zipq1.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z16test_svzipq1_f64u13__SVFloat64_tu13__SVFloat64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z16test_svzipq1_f64u13__SVFloat64_tS_
// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.zipq1.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
@@ -204,7 +204,7 @@ svfloat64_t test_svzipq1_f64(svfloat64_t zn, svfloat64_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq1.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z17test_svzipq1_bf16u14__SVBFloat16_tu14__SVBFloat16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z17test_svzipq1_bf16u14__SVBfloat16_tS_
// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq1.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c
index 220352ece1984b6..17800325bb09901 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c
@@ -1,14 +1,14 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
// REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
#include <arm_sve.h>
@@ -26,7 +26,7 @@
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svzipq2_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svzipq2_u8u11__SVUint8_tS_
// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
@@ -42,7 +42,7 @@ svuint8_t test_svzipq2_u8(svuint8_t zn, svuint8_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svzipq2_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svzipq2_u16u12__SVUint16_tS_
// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
@@ -58,7 +58,7 @@ svuint16_t test_svzipq2_u16(svuint16_t zn, svuint16_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svzipq2_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svzipq2_u32u12__SVUint32_tS_
// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
@@ -74,7 +74,7 @@ svuint32_t test_svzipq2_u32(svuint32_t zn, svuint32_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svzipq2_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svzipq2_u64u12__SVUint64_tS_
// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
@@ -91,7 +91,7 @@ svuint64_t test_svzipq2_u64(svuint64_t zn, svuint64_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svzipq2_s8u10__SVInt8_tu10__SVInt8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svzipq2_s8u10__SVInt8_tS_
// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
@@ -107,7 +107,7 @@ svint8_t test_svzipq2_s8(svint8_t zn, svint8_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svzipq2_s16u11__SVInt16_tu11__SVInt16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svzipq2_s16u11__SVInt16_tS_
// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
@@ -123,7 +123,7 @@ svint16_t test_svzipq2_s16(svint16_t zn, svint16_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svzipq2_s32u11__SVInt32_tu11__SVInt32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svzipq2_s32u11__SVInt32_tS_
// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
@@ -139,7 +139,7 @@ svint32_t test_svzipq2_s32(svint32_t zn, svint32_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svzipq2_s64u11__SVInt64_tu11__SVInt64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svzipq2_s64u11__SVInt64_tS_
// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
@@ -156,7 +156,7 @@ svint64_t test_svzipq2_s64(svint64_t zn, svint64_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.zipq2.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z16test_svzipq2_f16u13__SVFloat16_tu13__SVFloat16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z16test_svzipq2_f16u13__SVFloat16_tS_
// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.zipq2.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
@@ -172,7 +172,7 @@ svfloat16_t test_svzipq2_f16(svfloat16_t zn, svfloat16_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.zipq2.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z16test_svzipq2_f32u13__SVFloat32_tu13__SVFloat32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z16test_svzipq2_f32u13__SVFloat32_tS_
// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.zipq2.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
@@ -188,7 +188,7 @@ svfloat32_t test_svzipq2_f32(svfloat32_t zn, svfloat32_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.zipq2.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z16test_svzipq2_f64u13__SVFloat64_tu13__SVFloat64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z16test_svzipq2_f64u13__SVFloat64_tS_
// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.zipq2.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
@@ -204,7 +204,7 @@ svfloat64_t test_svzipq2_f64(svfloat64_t zn, svfloat64_t zm) {
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq2.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
//
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z17test_svzipq2_bf16u14__SVBFloat16_tu14__SVBFloat16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z17test_svzipq2_bf16u14__SVBfloat16_tS_
// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq2.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
>From 7d172a0d14f7471090c4cf56f0f00ef095c820cc Mon Sep 17 00:00:00 2001
From: Caroline Concatto <caroline.concatto at arm.com>
Date: Mon, 20 Nov 2023 14:42:45 +0000
Subject: [PATCH 3/4] Update zip tests
---
clang/include/clang/Basic/arm_sve.td | 12 ++++----
.../acle_sve2p1_uzpq1.c | 28 +++++++++---------
.../acle_sve2p1_uzpq2.c | 29 ++++++++++---------
.../acle_sve2p1_zipq1.c | 28 +++++++++---------
.../acle_sve2p1_zipq2.c | 28 +++++++++---------
.../acle_sve2p1_imm.cpp | 28 ++++++++++++++++++
6 files changed, 91 insertions(+), 62 deletions(-)
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index c377a0b89c1d591..2a3a373c0de316d 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2007,13 +2007,13 @@ let TargetGuard = "sve2p1" in {
// PMOV
// Move to Pred
multiclass PMOV_TO_PRED<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > {
- def _LANE : SInst<name # "_lane[_{d}]", "Pdk", types, MergeNone, intrinsic, flags, [ImmCheck<1, immCh>]>;
- def _ZERO : SInst<name # "[_{d}]", "Pd", types, MergeNone, intrinsic # "_zero", flags, []>;
+ def _LANE : SInst<name # "_lane[_{d}]", "Pdk", types, MergeNone, intrinsic, flags, [ImmCheck<1, immCh>]>;
+ def _LANE_ZERO : SInst<name # "[_{d}]", "Pd", types, MergeNone, intrinsic # "_zero", flags, []>;
}
- defm SVPMOV_B_TO_PRED_LANE : PMOV_TO_PRED<"svpmov", "cUc", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_0>;
- defm SVPMOV_H_TO_PRED_LANE : PMOV_TO_PRED<"svpmov", "sUs", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_1>;
- defm SVPMOV_S_TO_PRED_LANE : PMOV_TO_PRED<"svpmov", "iUi", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_3>;
- defm SVPMOV_D_TO_PRED_LANE : PMOV_TO_PRED<"svpmov", "lUl", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_7>;
+ defm SVPMOV_B_TO_PRED : PMOV_TO_PRED<"svpmov", "cUc", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_0>;
+ defm SVPMOV_H_TO_PRED : PMOV_TO_PRED<"svpmov", "sUs", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_1>;
+ defm SVPMOV_S_TO_PRED : PMOV_TO_PRED<"svpmov", "iUi", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_3>;
+ defm SVPMOV_D_TO_PRED : PMOV_TO_PRED<"svpmov", "lUl", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_7>;
// Move to Vector
multiclass PMOV_TO_VEC<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > {
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c
index b0e2e4b9c7a5981..c0fec3951ff58de 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c
@@ -15,9 +15,9 @@
#ifdef SVE_OVERLOADED_FORMS
// A simple used,unused... macro, long enough to represent any SVE builtin.
-#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1
#else
-#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#define SVE_ACLE_FUNC(A1, A2) A1##A2
#endif
// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svuzpq1_u8
@@ -33,7 +33,7 @@
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svuint8_t test_svuzpq1_u8(svuint8_t zn, svuint8_t zm) {
- return svuzpq1_u8(zn, zm);
+ return SVE_ACLE_FUNC(svuzpq1,_u8)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svuzpq1_u16
@@ -49,7 +49,7 @@ svuint8_t test_svuzpq1_u8(svuint8_t zn, svuint8_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
svuint16_t test_svuzpq1_u16(svuint16_t zn, svuint16_t zm) {
- return svuzpq1_u16(zn, zm);
+ return SVE_ACLE_FUNC(svuzpq1,_u16)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svuzpq1_u32
@@ -65,7 +65,7 @@ svuint16_t test_svuzpq1_u16(svuint16_t zn, svuint16_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
svuint32_t test_svuzpq1_u32(svuint32_t zn, svuint32_t zm) {
- return svuzpq1_u32(zn, zm);
+ return SVE_ACLE_FUNC(svuzpq1,_u32)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svuzpq1_u64
@@ -81,7 +81,7 @@ svuint32_t test_svuzpq1_u32(svuint32_t zn, svuint32_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
svuint64_t test_svuzpq1_u64(svuint64_t zn, svuint64_t zm) {
- return svuzpq1_u64(zn, zm);
+ return SVE_ACLE_FUNC(svuzpq1,_u64)(zn, zm);
}
@@ -98,7 +98,7 @@ svuint64_t test_svuzpq1_u64(svuint64_t zn, svuint64_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svint8_t test_svuzpq1_s8(svint8_t zn, svint8_t zm) {
- return svuzpq1_s8(zn, zm);
+ return SVE_ACLE_FUNC(svuzpq1,_s8)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svuzpq1_s16
@@ -114,7 +114,7 @@ svint8_t test_svuzpq1_s8(svint8_t zn, svint8_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
svint16_t test_svuzpq1_s16(svint16_t zn, svint16_t zm) {
- return svuzpq1_s16(zn, zm);
+ return SVE_ACLE_FUNC(svuzpq1,_s16)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svuzpq1_s32
@@ -130,7 +130,7 @@ svint16_t test_svuzpq1_s16(svint16_t zn, svint16_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
svint32_t test_svuzpq1_s32(svint32_t zn, svint32_t zm) {
- return svuzpq1_s32(zn, zm);
+ return SVE_ACLE_FUNC(svuzpq1,_s32)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svuzpq1_s64
@@ -146,7 +146,7 @@ svint32_t test_svuzpq1_s32(svint32_t zn, svint32_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
svint64_t test_svuzpq1_s64(svint64_t zn, svint64_t zm) {
- return svuzpq1_s64(zn, zm);
+ return SVE_ACLE_FUNC(svuzpq1,_s64)(zn, zm);
}
@@ -163,7 +163,7 @@ svint64_t test_svuzpq1_s64(svint64_t zn, svint64_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
svfloat16_t test_svuzpq1_f16(svfloat16_t zn, svfloat16_t zm) {
- return svuzpq1_f16(zn, zm);
+ return SVE_ACLE_FUNC(svuzpq1,_f16)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svuzpq1_f32
@@ -179,7 +179,7 @@ svfloat16_t test_svuzpq1_f16(svfloat16_t zn, svfloat16_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
svfloat32_t test_svuzpq1_f32(svfloat32_t zn, svfloat32_t zm) {
- return svuzpq1_f32(zn, zm);
+ return SVE_ACLE_FUNC(svuzpq1,_f32)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svuzpq1_f64
@@ -195,7 +195,7 @@ svfloat32_t test_svuzpq1_f32(svfloat32_t zn, svfloat32_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
//
svfloat64_t test_svuzpq1_f64(svfloat64_t zn, svfloat64_t zm) {
- return svuzpq1_f64(zn, zm);
+ return SVE_ACLE_FUNC(svuzpq1,_f64)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svuzpq1_bf16
@@ -211,7 +211,7 @@ svfloat64_t test_svuzpq1_f64(svfloat64_t zn, svfloat64_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
//
svbfloat16_t test_svuzpq1_bf16(svbfloat16_t zn, svbfloat16_t zm) {
- return svuzpq1_bf16(zn, zm);
+ return SVE_ACLE_FUNC(svuzpq1,_bf16)(zn, zm);
}
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c
index 154b4564f9c6e82..0477ed2c6c28f10 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c
@@ -15,9 +15,9 @@
#ifdef SVE_OVERLOADED_FORMS
// A simple used,unused... macro, long enough to represent any SVE builtin.
-#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1
#else
-#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#define SVE_ACLE_FUNC(A1, A2) A1##A2
#endif
// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svuzpq2_u8
@@ -33,7 +33,7 @@
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svuint8_t test_svuzpq2_u8(svuint8_t zn, svuint8_t zm) {
- return svuzpq2_u8(zn, zm);
+ return SVE_ACLE_FUNC(svuzpq2,_u8)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svuzpq2_u16
@@ -49,7 +49,7 @@ svuint8_t test_svuzpq2_u8(svuint8_t zn, svuint8_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
svuint16_t test_svuzpq2_u16(svuint16_t zn, svuint16_t zm) {
- return svuzpq2_u16(zn, zm);
+ return SVE_ACLE_FUNC(svuzpq2,_u16)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svuzpq2_u32
@@ -65,7 +65,7 @@ svuint16_t test_svuzpq2_u16(svuint16_t zn, svuint16_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
svuint32_t test_svuzpq2_u32(svuint32_t zn, svuint32_t zm) {
- return svuzpq2_u32(zn, zm);
+ return SVE_ACLE_FUNC(svuzpq2,_u32)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svuzpq2_u64
@@ -81,9 +81,10 @@ svuint32_t test_svuzpq2_u32(svuint32_t zn, svuint32_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
svuint64_t test_svuzpq2_u64(svuint64_t zn, svuint64_t zm) {
- return svuzpq2_u64(zn, zm);
+ return SVE_ACLE_FUNC(svuzpq2,_u64)(zn, zm);
}
+
// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svuzpq2_s8
// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
@@ -97,7 +98,7 @@ svuint64_t test_svuzpq2_u64(svuint64_t zn, svuint64_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svint8_t test_svuzpq2_s8(svint8_t zn, svint8_t zm) {
- return svuzpq2_s8(zn, zm);
+ return SVE_ACLE_FUNC(svuzpq2,_s8)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svuzpq2_s16
@@ -113,7 +114,7 @@ svint8_t test_svuzpq2_s8(svint8_t zn, svint8_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
svint16_t test_svuzpq2_s16(svint16_t zn, svint16_t zm) {
- return svuzpq2_s16(zn, zm);
+ return SVE_ACLE_FUNC(svuzpq2,_s16)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svuzpq2_s32
@@ -129,7 +130,7 @@ svint16_t test_svuzpq2_s16(svint16_t zn, svint16_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
svint32_t test_svuzpq2_s32(svint32_t zn, svint32_t zm) {
- return svuzpq2_s32(zn, zm);
+ return SVE_ACLE_FUNC(svuzpq2,_s32)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svuzpq2_s64
@@ -145,7 +146,7 @@ svint32_t test_svuzpq2_s32(svint32_t zn, svint32_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
svint64_t test_svuzpq2_s64(svint64_t zn, svint64_t zm) {
- return svuzpq2_s64(zn, zm);
+ return SVE_ACLE_FUNC(svuzpq2,_s64)(zn, zm);
}
@@ -162,7 +163,7 @@ svint64_t test_svuzpq2_s64(svint64_t zn, svint64_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
svfloat16_t test_svuzpq2_f16(svfloat16_t zn, svfloat16_t zm) {
- return svuzpq2_f16(zn, zm);
+ return SVE_ACLE_FUNC(svuzpq2,_f16)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svuzpq2_f32
@@ -178,7 +179,7 @@ svfloat16_t test_svuzpq2_f16(svfloat16_t zn, svfloat16_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
svfloat32_t test_svuzpq2_f32(svfloat32_t zn, svfloat32_t zm) {
- return svuzpq2_f32(zn, zm);
+ return SVE_ACLE_FUNC(svuzpq2,_f32)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svuzpq2_f64
@@ -194,7 +195,7 @@ svfloat32_t test_svuzpq2_f32(svfloat32_t zn, svfloat32_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
//
svfloat64_t test_svuzpq2_f64(svfloat64_t zn, svfloat64_t zm) {
- return svuzpq2_f64(zn, zm);
+ return SVE_ACLE_FUNC(svuzpq2,_f64)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svuzpq2_bf16
@@ -210,7 +211,7 @@ svfloat64_t test_svuzpq2_f64(svfloat64_t zn, svfloat64_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
//
svbfloat16_t test_svuzpq2_bf16(svbfloat16_t zn, svbfloat16_t zm) {
- return svuzpq2_bf16(zn, zm);
+ return SVE_ACLE_FUNC(svuzpq2,_bf16)(zn, zm);
}
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c
index d96c7eda374d896..06297651471ffd5 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c
@@ -15,9 +15,9 @@
#ifdef SVE_OVERLOADED_FORMS
// A simple used,unused... macro, long enough to represent any SVE builtin.
-#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1
#else
-#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#define SVE_ACLE_FUNC(A1, A2) A1##A2
#endif
// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svzipq1_u8
@@ -33,7 +33,7 @@
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svuint8_t test_svzipq1_u8(svuint8_t zn, svuint8_t zm) {
- return svzipq1_u8(zn, zm);
+ return SVE_ACLE_FUNC(svzipq1,_u8)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svzipq1_u16
@@ -49,7 +49,7 @@ svuint8_t test_svzipq1_u8(svuint8_t zn, svuint8_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
svuint16_t test_svzipq1_u16(svuint16_t zn, svuint16_t zm) {
- return svzipq1_u16(zn, zm);
+ return SVE_ACLE_FUNC(svzipq1,_u16)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svzipq1_u32
@@ -65,7 +65,7 @@ svuint16_t test_svzipq1_u16(svuint16_t zn, svuint16_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
svuint32_t test_svzipq1_u32(svuint32_t zn, svuint32_t zm) {
- return svzipq1_u32(zn, zm);
+ return SVE_ACLE_FUNC(svzipq1,_u32)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svzipq1_u64
@@ -81,7 +81,7 @@ svuint32_t test_svzipq1_u32(svuint32_t zn, svuint32_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
svuint64_t test_svzipq1_u64(svuint64_t zn, svuint64_t zm) {
- return svzipq1_u64(zn, zm);
+ return SVE_ACLE_FUNC(svzipq1,_u64)(zn, zm);
}
@@ -98,7 +98,7 @@ svuint64_t test_svzipq1_u64(svuint64_t zn, svuint64_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svint8_t test_svzipq1_s8(svint8_t zn, svint8_t zm) {
- return svzipq1_s8(zn, zm);
+ return SVE_ACLE_FUNC(svzipq1,_s8)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svzipq1_s16
@@ -114,7 +114,7 @@ svint8_t test_svzipq1_s8(svint8_t zn, svint8_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
svint16_t test_svzipq1_s16(svint16_t zn, svint16_t zm) {
- return svzipq1_s16(zn, zm);
+ return SVE_ACLE_FUNC(svzipq1,_s16)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svzipq1_s32
@@ -130,7 +130,7 @@ svint16_t test_svzipq1_s16(svint16_t zn, svint16_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
svint32_t test_svzipq1_s32(svint32_t zn, svint32_t zm) {
- return svzipq1_s32(zn, zm);
+ return SVE_ACLE_FUNC(svzipq1,_s32)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svzipq1_s64
@@ -146,7 +146,7 @@ svint32_t test_svzipq1_s32(svint32_t zn, svint32_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
svint64_t test_svzipq1_s64(svint64_t zn, svint64_t zm) {
- return svzipq1_s64(zn, zm);
+ return SVE_ACLE_FUNC(svzipq1,_s64)(zn, zm);
}
@@ -163,7 +163,7 @@ svint64_t test_svzipq1_s64(svint64_t zn, svint64_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
svfloat16_t test_svzipq1_f16(svfloat16_t zn, svfloat16_t zm) {
- return svzipq1_f16(zn, zm);
+ return SVE_ACLE_FUNC(svzipq1,_f16)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svzipq1_f32
@@ -179,7 +179,7 @@ svfloat16_t test_svzipq1_f16(svfloat16_t zn, svfloat16_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
svfloat32_t test_svzipq1_f32(svfloat32_t zn, svfloat32_t zm) {
- return svzipq1_f32(zn, zm);
+ return SVE_ACLE_FUNC(svzipq1,_f32)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svzipq1_f64
@@ -195,7 +195,7 @@ svfloat32_t test_svzipq1_f32(svfloat32_t zn, svfloat32_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
//
svfloat64_t test_svzipq1_f64(svfloat64_t zn, svfloat64_t zm) {
- return svzipq1_f64(zn, zm);
+ return SVE_ACLE_FUNC(svzipq1,_f64)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svzipq1_bf16
@@ -211,7 +211,7 @@ svfloat64_t test_svzipq1_f64(svfloat64_t zn, svfloat64_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
//
svbfloat16_t test_svzipq1_bf16(svbfloat16_t zn, svbfloat16_t zm) {
- return svzipq1_bf16(zn, zm);
+ return SVE_ACLE_FUNC(svzipq1,_bf16)(zn, zm);
}
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c
index 17800325bb09901..04cb6c69de6c79a 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c
@@ -15,9 +15,9 @@
#ifdef SVE_OVERLOADED_FORMS
// A simple used,unused... macro, long enough to represent any SVE builtin.
-#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1
#else
-#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#define SVE_ACLE_FUNC(A1, A2) A1##A2
#endif
// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svzipq2_u8
@@ -33,7 +33,7 @@
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svuint8_t test_svzipq2_u8(svuint8_t zn, svuint8_t zm) {
- return svzipq2_u8(zn, zm);
+ return SVE_ACLE_FUNC(svzipq2,_u8)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svzipq2_u16
@@ -49,7 +49,7 @@ svuint8_t test_svzipq2_u8(svuint8_t zn, svuint8_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
svuint16_t test_svzipq2_u16(svuint16_t zn, svuint16_t zm) {
- return svzipq2_u16(zn, zm);
+ return SVE_ACLE_FUNC(svzipq2,_u16)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svzipq2_u32
@@ -65,7 +65,7 @@ svuint16_t test_svzipq2_u16(svuint16_t zn, svuint16_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
svuint32_t test_svzipq2_u32(svuint32_t zn, svuint32_t zm) {
- return svzipq2_u32(zn, zm);
+ return SVE_ACLE_FUNC(svzipq2,_u32)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svzipq2_u64
@@ -81,7 +81,7 @@ svuint32_t test_svzipq2_u32(svuint32_t zn, svuint32_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
svuint64_t test_svzipq2_u64(svuint64_t zn, svuint64_t zm) {
- return svzipq2_u64(zn, zm);
+ return SVE_ACLE_FUNC(svzipq2,_u64)(zn, zm);
}
@@ -98,7 +98,7 @@ svuint64_t test_svzipq2_u64(svuint64_t zn, svuint64_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svint8_t test_svzipq2_s8(svint8_t zn, svint8_t zm) {
- return svzipq2_s8(zn, zm);
+ return SVE_ACLE_FUNC(svzipq2,_s8)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svzipq2_s16
@@ -114,7 +114,7 @@ svint8_t test_svzipq2_s8(svint8_t zn, svint8_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
svint16_t test_svzipq2_s16(svint16_t zn, svint16_t zm) {
- return svzipq2_s16(zn, zm);
+ return SVE_ACLE_FUNC(svzipq2,_s16)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svzipq2_s32
@@ -130,7 +130,7 @@ svint16_t test_svzipq2_s16(svint16_t zn, svint16_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
svint32_t test_svzipq2_s32(svint32_t zn, svint32_t zm) {
- return svzipq2_s32(zn, zm);
+ return SVE_ACLE_FUNC(svzipq2,_s32)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svzipq2_s64
@@ -146,7 +146,7 @@ svint32_t test_svzipq2_s32(svint32_t zn, svint32_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
svint64_t test_svzipq2_s64(svint64_t zn, svint64_t zm) {
- return svzipq2_s64(zn, zm);
+ return SVE_ACLE_FUNC(svzipq2,_s64)(zn, zm);
}
@@ -163,7 +163,7 @@ svint64_t test_svzipq2_s64(svint64_t zn, svint64_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
svfloat16_t test_svzipq2_f16(svfloat16_t zn, svfloat16_t zm) {
- return svzipq2_f16(zn, zm);
+ return SVE_ACLE_FUNC(svzipq2,_f16)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svzipq2_f32
@@ -179,7 +179,7 @@ svfloat16_t test_svzipq2_f16(svfloat16_t zn, svfloat16_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
svfloat32_t test_svzipq2_f32(svfloat32_t zn, svfloat32_t zm) {
- return svzipq2_f32(zn, zm);
+ return SVE_ACLE_FUNC(svzipq2,_f32)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svzipq2_f64
@@ -195,7 +195,7 @@ svfloat32_t test_svzipq2_f32(svfloat32_t zn, svfloat32_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
//
svfloat64_t test_svzipq2_f64(svfloat64_t zn, svfloat64_t zm) {
- return svzipq2_f64(zn, zm);
+ return SVE_ACLE_FUNC(svzipq2,_f64)(zn, zm);
}
// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svzipq2_bf16
@@ -211,7 +211,7 @@ svfloat64_t test_svzipq2_f64(svfloat64_t zn, svfloat64_t zm) {
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
//
svbfloat16_t test_svzipq2_bf16(svbfloat16_t zn, svbfloat16_t zm) {
- return svzipq2_bf16(zn, zm);
+ return SVE_ACLE_FUNC(svzipq2,_bf16)(zn, zm);
}
diff --git a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
index 30d60cfe205e7e0..84fdba432c24497 100644
--- a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
+++ b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
@@ -122,3 +122,31 @@ void test_svextq_lane(svint16_t zn_i16, svint16_t zm_i16, svfloat16_t zn_f16, sv
svextq_lane_s16(zn_i16, zm_i16, -1); // expected-error {{argument value -1 is outside the valid range [0, 15]}}
svextq_lane_f16(zn_f16, zm_f16, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
}
+
+__attribute__((target("+sve2p1")))
+void test_svpmov_lane(){
+ svuint8_t zn_u8;
+ svuint16_t zn_u16;
+ svuint32_t zn_u32;
+ svuint64_t zn_u64;
+ svbool_t pn;
+
+ svpmov_lane_u8(zn_u8, -1); // expected-error {{argument value -1 is outside the valid range [0, 0]}}
+ svpmov_lane_u16(zn_u16, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
+ svpmov_lane_u32(zn_u32, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
+ svpmov_lane_u64(zn_u64, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}}
+
+ svpmov_lane_u8(zn_u8, 1); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
+ svpmov_lane_u16(zn_u16, 3); // expected-error {{argument value 3 is outside the valid range [0, 1]}}
+ svpmov_lane_u32(zn_u32, 5); // expected-error {{argument value 5 is outside the valid range [0, 3]}}
+ svpmov_lane_u64(zn_u64, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+
+
+ zn_u16 = svpmov_lane_u16_m(zn_u16, pn, 0); // expected-error {{argument value 0 is outside the valid range [1, 1]}}
+ zn_u32 = svpmov_lane_u32_m(zn_u32, pn, 0); // expected-error {{argument value 0 is outside the valid range [1, 3]}}
+ zn_u64 = svpmov_lane_u64_m(zn_u64, pn, 0); // expected-error {{argument value 0 is outside the valid range [1, 7]}}
+
+ zn_u16 = svpmov_lane_u16_m(zn_u16, pn, 3); // expected-error {{argument value 3 is outside the valid range [1, 1]}}
+ zn_u32 = svpmov_lane_u32_m(zn_u32, pn, 5); // expected-error {{argument value 5 is outside the valid range [1, 3]}}
+ zn_u64 = svpmov_lane_u64_m(zn_u64, pn, 8); // expected-error {{argument value 8 is outside the valid range [1, 7]}}
+}
>From 54bc8bb0d11bbeab77fef494334fce0f90a17882 Mon Sep 17 00:00:00 2001
From: Caroline Concatto <caroline.concatto at arm.com>
Date: Mon, 20 Nov 2023 17:45:02 +0000
Subject: [PATCH 4/4] Fix pmov to have a constant of uint64_t for lane
---
clang/include/clang/Basic/arm_sve.td | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 2a3a373c0de316d..59dc10c884b19e2 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2007,7 +2007,7 @@ let TargetGuard = "sve2p1" in {
// PMOV
// Move to Pred
multiclass PMOV_TO_PRED<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > {
- def _LANE : SInst<name # "_lane[_{d}]", "Pdk", types, MergeNone, intrinsic, flags, [ImmCheck<1, immCh>]>;
+ def _LANE : Inst<name # "_lane[_{d}]", "Pdi", types, MergeNone, intrinsic, flags, [ImmCheck<1, immCh>]>;
def _LANE_ZERO : SInst<name # "[_{d}]", "Pd", types, MergeNone, intrinsic # "_zero", flags, []>;
}
defm SVPMOV_B_TO_PRED : PMOV_TO_PRED<"svpmov", "cUc", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_0>;
@@ -2017,7 +2017,7 @@ let TargetGuard = "sve2p1" in {
// Move to Vector
multiclass PMOV_TO_VEC<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > {
- def _M : SInst<name # "_lane[_{d}]", "ddPk", types, MergeOp1, intrinsic # "_merging", flags, [ImmCheck<2, immCh>]>;
+ def _M : SInst<name # "_lane[_{d}]", "ddPi", types, MergeOp1, intrinsic # "_merging", flags, [ImmCheck<2, immCh>]>;
def _Z : SInst<name # "_{d}_z", "dP", types, MergeNone, intrinsic # "_zeroing", flags, []>;
}
def SVPMOV_TO_VEC_LANE_B : SInst<"svpmov_{d}_z", "dP", "cUc", MergeNone, "aarch64_sve_pmov_to_vector_lane_zeroing", [], []>;
More information about the cfe-commits
mailing list