[clang] 67e4330 - [sve][acle] Implement some of the C intrinsics for brain float.
Francesco Petrogalli via cfe-commits
cfe-commits at lists.llvm.org
Mon Jun 29 09:10:21 PDT 2020
Author: Francesco Petrogalli
Date: 2020-06-29T16:09:08Z
New Revision: 67e4330facfbf798ecc40cd2449f70e6758078b9
URL: https://github.com/llvm/llvm-project/commit/67e4330facfbf798ecc40cd2449f70e6758078b9
DIFF: https://github.com/llvm/llvm-project/commit/67e4330facfbf798ecc40cd2449f70e6758078b9.diff
LOG: [sve][acle] Implement some of the C intrinsics for brain float.
Summary:
The following intrinsics have been extended to support brain float types:
svbfloat16_t svclasta[_bf16](svbool_t pg, svbfloat16_t fallback, svbfloat16_t data)
bfloat16_t svclasta[_n_bf16](svbool_t pg, bfloat16_t fallback, svbfloat16_t data)
bfloat16_t svlasta[_bf16](svbool_t pg, svbfloat16_t op)
svbfloat16_t svclastb[_bf16](svbool_t pg, svbfloat16_t fallback, svbfloat16_t data)
bfloat16_t svclastb[_n_bf16](svbool_t pg, bfloat16_t fallback, svbfloat16_t data)
bfloat16_t svlastb[_bf16](svbool_t pg, svbfloat16_t op)
svbfloat16_t svdup[_n]_bf16(bfloat16_t op)
svbfloat16_t svdup[_n]_bf16_m(svbfloat16_t inactive, svbool_t pg, bfloat16_t op)
svbfloat16_t svdup[_n]_bf16_x(svbool_t pg, bfloat16_t op)
svbfloat16_t svdup[_n]_bf16_z(svbool_t pg, bfloat16_t op)
svbfloat16_t svdupq[_n]_bf16(bfloat16_t x0, bfloat16_t x1, bfloat16_t x2, bfloat16_t x3, bfloat16_t x4, bfloat16_t x5, bfloat16_t x6, bfloat16_t x7)
svbfloat16_t svdupq_lane[_bf16](svbfloat16_t data, uint64_t index)
svbfloat16_t svinsr[_n_bf16](svbfloat16_t op1, bfloat16_t op2)
Reviewers: sdesmalen, kmclaughlin, c-rhodes, ctetreau, efriedma
Subscribers: tschuett, hiraditya, rkruppe, psnobl, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D82345
Added:
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta-bfloat.c
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb-bfloat.c
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr-bfloat.c
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lasta-bfloat.c
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lastb-bfloat.c
Modified:
clang/include/clang/Basic/arm_sve.td
clang/lib/CodeGen/CGBuiltin.cpp
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll
llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll
llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
llvm/test/CodeGen/AArch64/sve-vector-splat.ll
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 5a0989640eae..1d1b622349ed 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -725,13 +725,23 @@ def SVADRD : SInst<"svadrd[_{0}base]_[{2}]index", "uud", "ilUiUl", MergeNone, "
def SVDUPQ_8 : SInst<"svdupq[_n]_{d}", "dssssssssssssssss", "cUc", MergeNone>;
def SVDUPQ_16 : SInst<"svdupq[_n]_{d}", "dssssssss", "sUsh", MergeNone>;
+let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in {
+ def SVDUPQ_BF16 : SInst<"svdupq[_n]_{d}", "dssssssss", "b", MergeNone>;
+}
def SVDUPQ_32 : SInst<"svdupq[_n]_{d}", "dssss", "iUif", MergeNone>;
def SVDUPQ_64 : SInst<"svdupq[_n]_{d}", "dss", "lUld", MergeNone>;
-def SVDUP : SInst<"svdup[_n]_{d}", "ds", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_dup_x">;
-def SVDUP_M : SInst<"svdup[_n]_{d}", "ddPs", "csilUcUsUiUlhfd", MergeOp1, "aarch64_sve_dup">;
-def SVDUP_X : SInst<"svdup[_n]_{d}", "dPs", "csilUcUsUiUlhfd", MergeAnyExp, "aarch64_sve_dup">;
-def SVDUP_Z : SInst<"svdup[_n]_{d}", "dPs", "csilUcUsUiUlhfd", MergeZeroExp, "aarch64_sve_dup">;
+multiclass svdup_base<string n, string p, MergeType mt, string i> {
+ def NAME : SInst<n, p, "csilUcUsUiUlhfd", mt, i>;
+ let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in {
+ def _BF16: SInst<n, p, "b", mt, i>;
+ }
+}
+
+defm SVDUP : svdup_base<"svdup[_n]_{d}", "ds", MergeNone, "aarch64_sve_dup_x">;
+defm SVDUP_M : svdup_base<"svdup[_n]_{d}", "ddPs", MergeOp1, "aarch64_sve_dup">;
+defm SVDUP_X : svdup_base<"svdup[_n]_{d}", "dPs", MergeAnyExp, "aarch64_sve_dup">;
+defm SVDUP_Z : svdup_base<"svdup[_n]_{d}", "dPs", MergeZeroExp, "aarch64_sve_dup">;
def SVINDEX : SInst<"svindex_{d}", "dss", "csilUcUsUiUl", MergeNone, "aarch64_sve_index">;
@@ -850,8 +860,11 @@ defm SVLSR : SInst_SHIFT<"svlsr", "aarch64_sve_lsr", "UcUsUiUl", "UcUsUi">;
def SVASRD_M : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeOp1, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>;
def SVASRD_X : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeAny, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>;
def SVASRD_Z : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeZero, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>;
-def SVINSR : SInst<"svinsr[_n_{d}]", "dds", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_insr">;
+def SVINSR : SInst<"svinsr[_n_{d}]", "dds", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_insr">;
+let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in {
+ def SVINSR_BF16 : SInst<"svinsr[_n_{d}]", "dds", "b", MergeNone, "aarch64_sve_insr">;
+}
////////////////////////////////////////////////////////////////////////////////
// Integer reductions
@@ -1173,10 +1186,18 @@ def SVCVTXNT_F32 : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aarch6
////////////////////////////////////////////////////////////////////////////////
// Permutations and selection
-def SVCLASTA : SInst<"svclasta[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_clasta">;
-def SVCLASTA_N : SInst<"svclasta[_n_{d}]", "sPsd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_clasta_n">;
-def SVCLASTB : SInst<"svclastb[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_clastb">;
-def SVCLASTB_N : SInst<"svclastb[_n_{d}]", "sPsd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_clastb_n">;
+multiclass SVEPerm<string name, string proto, string i> {
+ def : SInst<name, proto, "csilUcUsUiUlhfd", MergeNone, i>;
+ let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in {
+ def: SInst<name, proto, "b", MergeNone, i>;
+ }
+}
+
+defm SVCLASTA : SVEPerm<"svclasta[_{d}]", "dPdd", "aarch64_sve_clasta">;
+defm SVCLASTA_N : SVEPerm<"svclasta[_n_{d}]", "sPsd", "aarch64_sve_clasta_n">;
+defm SVCLASTB : SVEPerm<"svclastb[_{d}]", "dPdd", "aarch64_sve_clastb">;
+defm SVCLASTB_N : SVEPerm<"svclastb[_n_{d}]", "sPsd", "aarch64_sve_clastb_n">;
+
def SVCOMPACT : SInst<"svcompact[_{d}]", "dPd", "ilUiUlfd", MergeNone, "aarch64_sve_compact">;
// Note: svdup_lane is implemented using the intrinsic for TBL to represent a
// splat of any possible lane. It is upto LLVM to pick a more efficient
@@ -1184,9 +1205,12 @@ def SVCOMPACT : SInst<"svcompact[_{d}]", "dPd", "ilUiUlfd", MergeNo
// instruction's immediate.
def SVDUP_LANE : SInst<"svdup_lane[_{d}]", "ddL", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl">;
def SVDUPQ_LANE : SInst<"svdupq_lane[_{d}]", "ddn", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_dupq_lane">;
+let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in {
+ def SVDUPQ_LANE_BF16 : SInst<"svdupq_lane[_{d}]", "ddn", "b", MergeNone, "aarch64_sve_dupq_lane">;
+}
def SVEXT : SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [], [ImmCheck<2, ImmCheckExtract, 1>]>;
-def SVLASTA : SInst<"svlasta[_{d}]", "sPd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_lasta">;
-def SVLASTB : SInst<"svlastb[_{d}]", "sPd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_lastb">;
+defm SVLASTA : SVEPerm<"svlasta[_{d}]", "sPd", "aarch64_sve_lasta">;
+defm SVLASTB : SVEPerm<"svlastb[_{d}]", "sPd", "aarch64_sve_lastb">;
def SVREV : SInst<"svrev[_{d}]", "dd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_rev">;
def SVSEL : SInst<"svsel[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_sel">;
def SVSPLICE : SInst<"svsplice[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_splice">;
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b5c4841578c4..0a3adf6fbf24 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -8386,6 +8386,7 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
case SVE::BI__builtin_sve_svdupq_n_s64:
case SVE::BI__builtin_sve_svdupq_n_u16:
case SVE::BI__builtin_sve_svdupq_n_f16:
+ case SVE::BI__builtin_sve_svdupq_n_bf16:
case SVE::BI__builtin_sve_svdupq_n_s16:
case SVE::BI__builtin_sve_svdupq_n_u32:
case SVE::BI__builtin_sve_svdupq_n_f32:
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta-bfloat.c
new file mode 100644
index 000000000000..cf8e829a8819
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta-bfloat.c
@@ -0,0 +1,36 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t
+// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it.
+// ASM-NOT: warning
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+svbfloat16_t test_svclasta_bf16(svbool_t pg, svbfloat16_t fallback, svbfloat16_t data) {
+ // CHECK-LABEL: test_svclasta_bf16
+ // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+ // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x bfloat> %fallback, <vscale x 8 x bfloat> %data)
+ // CHECK: ret <vscale x 8 x bfloat> %[[INTRINSIC]]
+ // expected-warning at +1 {{implicit declaration of function 'svclasta_bf16'}}
+ return SVE_ACLE_FUNC(svclasta, _bf16, , )(pg, fallback, data);
+}
+
+bfloat16_t test_svclasta_n_bf16(svbool_t pg, bfloat16_t fallback, svbfloat16_t data) {
+ // CHECK-LABEL: test_svclasta_n_bf16
+ // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+ // CHECK: %[[INTRINSIC:.*]] = call bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1> %[[PG]], bfloat %fallback, <vscale x 8 x bfloat> %data)
+ // CHECK: ret bfloat %[[INTRINSIC]]
+ // expected-warning at +1 {{implicit declaration of function 'svclasta_n_bf16'}}
+ return SVE_ACLE_FUNC(svclasta, _n_bf16, , )(pg, fallback, data);
+}
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb-bfloat.c
new file mode 100644
index 000000000000..3dd84dc705fc
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb-bfloat.c
@@ -0,0 +1,36 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t
+// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it.
+// ASM-NOT: warning
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+svbfloat16_t test_svclastb_bf16(svbool_t pg, svbfloat16_t fallback, svbfloat16_t data) {
+ // CHECK-LABEL: test_svclastb_bf16
+ // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+ // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clastb.nxv8bf16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x bfloat> %fallback, <vscale x 8 x bfloat> %data)
+ // CHECK: ret <vscale x 8 x bfloat> %[[INTRINSIC]]
+ // expected-warning at +1 {{implicit declaration of function 'svclastb_bf16'}}
+ return SVE_ACLE_FUNC(svclastb, _bf16, , )(pg, fallback, data);
+}
+
+bfloat16_t test_svclastb_n_bf16(svbool_t pg, bfloat16_t fallback, svbfloat16_t data) {
+ // CHECK-LABEL: test_svclastb_n_bf16
+ // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+ // CHECK: %[[INTRINSIC:.*]] = call bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(<vscale x 8 x i1> %[[PG]], bfloat %fallback, <vscale x 8 x bfloat> %data)
+ // CHECK: ret bfloat %[[INTRINSIC]]
+ // expected-warning at +1 {{implicit declaration of function 'svclastb_n_bf16'}}
+ return SVE_ACLE_FUNC(svclastb, _n_bf16, , )(pg, fallback, data);
+}
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c
new file mode 100644
index 000000000000..4e0508f39412
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c
@@ -0,0 +1,53 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t
+// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it.
+// ASM-NOT: warning
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+svbfloat16_t test_svdup_n_bf16(bfloat16_t op) {
+ // CHECK-LABEL: test_svdup_n_bf16
+ // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat %op)
+ // CHECK: ret <vscale x 8 x bfloat> %[[INTRINSIC]]
+ // expected-warning at +1 {{implicit declaration of function 'svdup_n_bf16'}}
+ return SVE_ACLE_FUNC(svdup, _n, _bf16, )(op);
+}
+
+svbfloat16_t test_svdup_n_bf16_z(svbool_t pg, bfloat16_t op) {
+ // CHECK-LABEL: test_svdup_n_bf16_z
+ // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+ // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x i1> %[[PG]], bfloat %op)
+ // CHECK: ret <vscale x 8 x bfloat> %[[INTRINSIC]]
+ // expected-warning at +1 {{implicit declaration of function 'svdup_n_bf16_z'}}
+ return SVE_ACLE_FUNC(svdup, _n, _bf16_z, )(pg, op);
+}
+
+svbfloat16_t test_svdup_n_bf16_m(svbfloat16_t inactive, svbool_t pg, bfloat16_t op) {
+ // CHECK-LABEL: test_svdup_n_bf16_m
+ // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+ // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> %inactive, <vscale x 8 x i1> %[[PG]], bfloat %op)
+ // CHECK: ret <vscale x 8 x bfloat> %[[INTRINSIC]]
+ // expected-warning at +1 {{implicit declaration of function 'svdup_n_bf16_m'}}
+ return SVE_ACLE_FUNC(svdup, _n, _bf16_m, )(inactive, pg, op);
+}
+
+svbfloat16_t test_svdup_n_bf16_x(svbool_t pg, bfloat16_t op) {
+ // CHECK-LABEL: test_svdup_n_bf16_x
+ // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+ // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> %[[PG]], bfloat %op)
+ // CHECK: ret <vscale x 8 x bfloat> %[[INTRINSIC]]
+ // expected-warning at +1 {{implicit declaration of function 'svdup_n_bf16_x'}}
+ return SVE_ACLE_FUNC(svdup, _n, _bf16_x, )(pg, op);
+}
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c
new file mode 100644
index 000000000000..2b3603242eed
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c
@@ -0,0 +1,42 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t
+// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it.
+// ASM-NOT: warning
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+svbfloat16_t test_svdupq_lane_bf16(svbfloat16_t data, uint64_t index) {
+ // CHECK-LABEL: test_svdupq_lane_bf16
+ // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %data, i64 %index)
+ // CHECK: ret <vscale x 8 x bfloat> %[[INTRINSIC]]
+ // expected-warning at +1 {{implicit declaration of function 'svdupq_lane_bf16'}}
+ return SVE_ACLE_FUNC(svdupq_lane, _bf16, , )(data, index);
+}
+svbfloat16_t test_svdupq_n_bf16(bfloat16_t x0, bfloat16_t x1, bfloat16_t x2, bfloat16_t x3,
+ bfloat16_t x4, bfloat16_t x5, bfloat16_t x6, bfloat16_t x7) {
+ // CHECK-LABEL: test_svdupq_n_bf16
+ // CHECK: %[[ALLOCA:.*]] = alloca [8 x bfloat], align 16
+ // CHECK-DAG: %[[BASE:.*]] = getelementptr inbounds [8 x bfloat], [8 x bfloat]* %[[ALLOCA]], i64 0, i64 0
+ // CHECK-DAG: store bfloat %x0, bfloat* %[[BASE]], align 16
+ // <assume other stores>
+ // CHECK-DAG: %[[GEP:.*]] = getelementptr inbounds [8 x bfloat], [8 x bfloat]* %[[ALLOCA]], i64 0, i64 7
+ // CHECK: store bfloat %x7, bfloat* %[[GEP]], align 2
+ // CHECK-NOT: store
+ // CHECK: call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ // CHECK: %[[LOAD:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1rq.nxv8bf16(<vscale x 8 x i1> %{{.*}}, bfloat* nonnull %[[BASE]])
+ // CHECK: ret <vscale x 8 x bfloat> %[[LOAD]]
+ // expected-warning at +1 {{implicit declaration of function 'svdupq_n_bf16'}}
+ return SVE_ACLE_FUNC(svdupq, _n, _bf16, )(x0, x1, x2, x3, x4, x5, x6, x7);
+}
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr-bfloat.c
new file mode 100644
index 000000000000..30d65995fbda
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr-bfloat.c
@@ -0,0 +1,26 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t
+// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it.
+// ASM-NOT: warning
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+svbfloat16_t test_svinsr_n_bf16(svbfloat16_t op1, bfloat16_t op2) {
+ // CHECK-LABEL: test_svinsr_n_bf16
+ // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.insr.nxv8bf16(<vscale x 8 x bfloat> %op1, bfloat %op2)
+ // CHECK: ret <vscale x 8 x bfloat> %[[INTRINSIC]]
+ // expected-warning at +1 {{implicit declaration of function 'svinsr_n_bf16'}}
+ return SVE_ACLE_FUNC(svinsr, _n_bf16, , )(op1, op2);
+}
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lasta-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lasta-bfloat.c
new file mode 100644
index 000000000000..cb02d2dc60ad
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lasta-bfloat.c
@@ -0,0 +1,27 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t
+// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it.
+// ASM-NOT: warning
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+bfloat16_t test_svlasta_bf16(svbool_t pg, svbfloat16_t op) {
+ // CHECK-LABEL: test_svlasta_bf16
+ // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+ // CHECK: %[[INTRINSIC:.*]] = call bfloat @llvm.aarch64.sve.lasta.nxv8bf16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x bfloat> %op)
+ // CHECK: ret bfloat %[[INTRINSIC]]
+ // expected-warning at +1 {{implicit declaration of function 'svlasta_bf16'}}
+ return SVE_ACLE_FUNC(svlasta, _bf16, , )(pg, op);
+}
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lastb-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lastb-bfloat.c
new file mode 100644
index 000000000000..5c8abb55a077
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lastb-bfloat.c
@@ -0,0 +1,27 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t
+// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it.
+// ASM-NOT: warning
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+bfloat16_t test_svlastb_bf16(svbool_t pg, svbfloat16_t op) {
+ // CHECK-LABEL: test_svlastb_bf16
+ // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+ // CHECK: %[[INTRINSIC:.*]] = call bfloat @llvm.aarch64.sve.lastb.nxv8bf16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x bfloat> %op)
+ // CHECK: ret bfloat %[[INTRINSIC]]
+ // expected-warning at +1 {{implicit declaration of function 'svlastb_bf16'}}
+ return SVE_ACLE_FUNC(svlastb, _bf16, , )(pg, op);
+}
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index d3a1c2789cfe..0344aad85030 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -423,6 +423,11 @@ let Predicates = [HasSVE] in {
defm CPY_ZPmR : sve_int_perm_cpy_r<"cpy", AArch64dup_mt>;
defm CPY_ZPmV : sve_int_perm_cpy_v<"cpy", AArch64dup_mt>;
+ let Predicates = [HasSVE, HasBF16] in {
+ def : Pat<(nxv8bf16 (AArch64dup_mt nxv8i1:$pg, bf16:$splat, nxv8bf16:$passthru)),
+ (CPY_ZPmV_H $passthru, $pg, $splat)>;
+ }
+
// Duplicate FP scalar into all vector elements
def : Pat<(nxv8f16 (AArch64dup (f16 FPR16:$src))),
(DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
@@ -436,6 +441,10 @@ let Predicates = [HasSVE] in {
(DUP_ZZI_S (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), 0)>;
def : Pat<(nxv2f64 (AArch64dup (f64 FPR64:$src))),
(DUP_ZZI_D (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$src, dsub), 0)>;
+ let Predicates = [HasSVE, HasBF16] in {
+ def : Pat<(nxv8bf16 (AArch64dup (bf16 FPR16:$src))),
+ (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
+ }
// Duplicate +0.0 into all vector elements
def : Pat<(nxv8f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>;
@@ -444,6 +453,9 @@ let Predicates = [HasSVE] in {
def : Pat<(nxv4f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>;
def : Pat<(nxv2f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>;
def : Pat<(nxv2f64 (AArch64dup (f64 fpimm0))), (DUP_ZI_D 0, 0)>;
+ let Predicates = [HasSVE, HasBF16] in {
+ def : Pat<(nxv8bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>;
+ }
// Duplicate Int immediate into all vector elements
def : Pat<(nxv16i8 (AArch64dup (i32 (SVE8BitLslImm i32:$a, i32:$b)))),
@@ -486,6 +498,10 @@ let Predicates = [HasSVE] in {
defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>;
defm EXT_ZZI : sve_int_perm_extract_i<"ext", AArch64ext>;
+ let Predicates = [HasSVE, HasBF16] in {
+ def : SVE_2_Op_Pat<nxv8bf16, AArch64insr, nxv8bf16, bf16, INSR_ZV_H>;
+ }
+
defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit", int_aarch64_sve_rbit>;
defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb", int_aarch64_sve_revb, bswap>;
defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh", int_aarch64_sve_revh>;
@@ -554,11 +570,23 @@ let Predicates = [HasSVE] in {
defm CLASTA_ZPZ : sve_int_perm_clast_zz<0, "clasta", int_aarch64_sve_clasta>;
defm CLASTB_ZPZ : sve_int_perm_clast_zz<1, "clastb", int_aarch64_sve_clastb>;
+ let Predicates = [HasSVE, HasBF16] in {
+ def : SVE_3_Op_Pat<bf16, AArch64clasta_n, nxv8i1, bf16, nxv8bf16, CLASTA_VPZ_H>;
+ def : SVE_3_Op_Pat<bf16, AArch64clastb_n, nxv8i1, bf16, nxv8bf16, CLASTB_VPZ_H>;
+ def : SVE_3_Op_Pat<nxv8bf16, int_aarch64_sve_clasta, nxv8i1, nxv8bf16, nxv8bf16, CLASTA_ZPZ_H>;
+ def : SVE_3_Op_Pat<nxv8bf16, int_aarch64_sve_clastb, nxv8i1, nxv8bf16, nxv8bf16, CLASTB_ZPZ_H>;
+ }
+
defm LASTA_RPZ : sve_int_perm_last_r<0, "lasta", AArch64lasta>;
defm LASTB_RPZ : sve_int_perm_last_r<1, "lastb", AArch64lastb>;
defm LASTA_VPZ : sve_int_perm_last_v<0, "lasta", AArch64lasta>;
defm LASTB_VPZ : sve_int_perm_last_v<1, "lastb", AArch64lastb>;
+ let Predicates = [HasSVE, HasBF16] in {
+ def : SVE_2_Op_Pat<bf16, AArch64lasta, nxv8i1, nxv8bf16, LASTA_VPZ_H>;
+ def : SVE_2_Op_Pat<bf16, AArch64lastb, nxv8i1, nxv8bf16, LASTB_VPZ_H>;
+ }
+
// continuous load with reg+immediate
defm LD1B_IMM : sve_mem_cld_si<0b0000, "ld1b", Z_b, ZPR8>;
defm LD1B_H_IMM : sve_mem_cld_si<0b0001, "ld1b", Z_h, ZPR16>;
@@ -1499,6 +1527,13 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
def : Pat<(nxv2f64 (bitconvert (nxv2i64 ZPR:$src))), (nxv2f64 ZPR:$src)>;
def : Pat<(nxv2f64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2f64 ZPR:$src)>;
def : Pat<(nxv2f64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2f64 ZPR:$src)>;
+
+ }
+
+ let Predicates = [IsLE, HasBF16, HasSVE] in {
+ def : Pat<(nxv2i64 (bitconvert (nxv8bf16 ZPR:$src))), (nxv2i64 ZPR:$src)>;
+ def : Pat<(nxv8bf16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
+ def : Pat<(nxv8bf16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
}
let Predicates = [IsLE, HasSVE, HasBF16] in {
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll
index d6f240a15bed..788592c131bc 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll
@@ -81,6 +81,14 @@ define <vscale x 8 x half> @dup_f16(half %b) {
ret <vscale x 8 x half> %out
}
+define <vscale x 8 x bfloat> @dup_bf16(bfloat %b) #0 {
+; CHECK-LABEL: dup_bf16:
+; CHECK: mov z0.h, h0
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat %b)
+ ret <vscale x 8 x bfloat> %out
+}
+
define <vscale x 8 x half> @dup_imm_f16(half %b) {
; CHECK-LABEL: dup_imm_f16:
; CHECK: mov z0.h, #16.00000000
@@ -126,5 +134,9 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16)
declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64)
declare <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat)
declare <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float)
declare <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double)
+
+; +bf16 is required for the bfloat version.
+attributes #0 = { "target-features"="+sve,+bf16" }
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
index e5866c1b68ff..433a1cdfd8e9 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
@@ -57,6 +57,16 @@ define <vscale x 8 x half> @clasta_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half
ret <vscale x 8 x half> %out
}
+define <vscale x 8 x bfloat> @clasta_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
+; CHECK-LABEL: clasta_bf16:
+; CHECK: clasta z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x bfloat> %a,
+ <vscale x 8 x bfloat> %b)
+ ret <vscale x 8 x bfloat> %out
+}
+
define <vscale x 4 x float> @clasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: clasta_f32:
; CHECK: clasta z0.s, p0, z0.s, z1.s
@@ -131,6 +141,16 @@ define half @clasta_n_f16(<vscale x 8 x i1> %pg, half %a, <vscale x 8 x half> %b
ret half %out
}
+define bfloat @clasta_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfloat> %b) #0 {
+; CHECK-LABEL: clasta_n_bf16:
+; CHECK: clasta h0, p0, h0, z1.h
+; CHECK-NEXT: ret
+ %out = call bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1> %pg,
+ bfloat %a,
+ <vscale x 8 x bfloat> %b)
+ ret bfloat %out
+}
+
define float @clasta_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: clasta_n_f32:
; CHECK: clasta s0, p0, s0, z1.s
@@ -205,6 +225,16 @@ define <vscale x 8 x half> @clastb_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half
ret <vscale x 8 x half> %out
}
+define <vscale x 8 x bfloat> @clastb_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
+; CHECK-LABEL: clastb_bf16:
+; CHECK: clastb z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clastb.nxv8bf16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x bfloat> %a,
+ <vscale x 8 x bfloat> %b)
+ ret <vscale x 8 x bfloat> %out
+}
+
define <vscale x 4 x float> @clastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: clastb_f32:
; CHECK: clastb z0.s, p0, z0.s, z1.s
@@ -279,6 +309,16 @@ define half @clastb_n_f16(<vscale x 8 x i1> %pg, half %a, <vscale x 8 x half> %b
ret half %out
}
+define bfloat @clastb_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfloat> %b) #0 {
+; CHECK-LABEL: clastb_n_bf16:
+; CHECK: clastb h0, p0, h0, z1.h
+; CHECK-NEXT: ret
+ %out = call bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(<vscale x 8 x i1> %pg,
+ bfloat %a,
+ <vscale x 8 x bfloat> %b)
+ ret bfloat %out
+}
+
define float @clastb_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: clastb_n_f32:
; CHECK: clastb s0, p0, s0, z1.s
@@ -343,6 +383,14 @@ define <vscale x 8 x half> @dupq_f16(<vscale x 8 x half> %a) {
ret <vscale x 8 x half> %out
}
+define <vscale x 8 x bfloat> @dupq_bf16(<vscale x 8 x bfloat> %a) #0 {
+; CHECK-LABEL: dupq_bf16:
+; CHECK: mov z0.q, q0
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %a, i64 0)
+ ret <vscale x 8 x bfloat> %out
+}
+
define <vscale x 4 x float> @dupq_f32(<vscale x 4 x float> %a) {
; CHECK-LABEL: dupq_f32:
; CHECK: mov z0.q, z0.q[1]
@@ -432,6 +480,20 @@ define <vscale x 8 x half> @dupq_lane_f16(<vscale x 8 x half> %a, i64 %idx) {
ret <vscale x 8 x half> %out
}
+; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
+define <vscale x 8 x bfloat> @dupq_lane_bf16(<vscale x 8 x bfloat> %a, i64 %idx) #0 {
+; CHECK-LABEL: dupq_lane_bf16:
+; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1
+; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
+; CHECK-DAG: add [[X1:x[0-9]+]], x0, x0
+; CHECK-DAG: mov [[Z3:z[0-9]+]].d, [[X1]]
+; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
+; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %a, i64 %idx)
+ ret <vscale x 8 x bfloat> %out
+}
+
; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
define <vscale x 4 x float> @dupq_lane_f32(<vscale x 4 x float> %a, i64 %idx) {
; CHECK-LABEL: dupq_lane_f32:
@@ -605,6 +667,15 @@ define half @lasta_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
ret half %res
}
+define bfloat @lasta_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) #0 {
+; CHECK-LABEL: lasta_bf16
+; CHECK: lasta h0, p0, z0.h
+; CHECK-NEXT: ret
+ %res = call bfloat @llvm.aarch64.sve.lasta.nxv8bf16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x bfloat> %a)
+ ret bfloat %res
+}
+
define float @lasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
; CHECK-LABEL: lasta_f32
; CHECK: lasta s0, p0, z0.s
@@ -681,6 +752,15 @@ define half @lastb_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
ret half %res
}
+define bfloat @lastb_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) #0 {
+; CHECK-LABEL: lastb_bf16
+; CHECK: lastb h0, p0, z0.h
+; CHECK-NEXT: ret
+ %res = call bfloat @llvm.aarch64.sve.lastb.nxv8bf16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x bfloat> %a)
+ ret bfloat %res
+}
+
define float @lastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
; CHECK-LABEL: lastb_f32
; CHECK: lastb s0, p0, z0.s
@@ -1851,6 +1931,7 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.clasta.nxv8i16(<vscale x 8 x i1>, <
declare <vscale x 4 x i32> @llvm.aarch64.sve.clasta.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.clasta.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
declare <vscale x 8 x half> @llvm.aarch64.sve.clasta.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
declare <vscale x 4 x float> @llvm.aarch64.sve.clasta.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.clasta.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
@@ -1859,6 +1940,7 @@ declare i16 @llvm.aarch64.sve.clasta.n.nxv8i16(<vscale x 8 x i1>, i16, <vscale x
declare i32 @llvm.aarch64.sve.clasta.n.nxv4i32(<vscale x 4 x i1>, i32, <vscale x 4 x i32>)
declare i64 @llvm.aarch64.sve.clasta.n.nxv2i64(<vscale x 2 x i1>, i64, <vscale x 2 x i64>)
declare half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1>, half, <vscale x 8 x half>)
+declare bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1>, bfloat, <vscale x 8 x bfloat>)
declare float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1>, float, <vscale x 4 x float>)
declare double @llvm.aarch64.sve.clasta.n.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>)
@@ -1867,6 +1949,7 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.clastb.nxv8i16(<vscale x 8 x i1>, <
declare <vscale x 4 x i32> @llvm.aarch64.sve.clastb.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.clastb.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
declare <vscale x 8 x half> @llvm.aarch64.sve.clastb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.clastb.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
declare <vscale x 4 x float> @llvm.aarch64.sve.clastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.clastb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
@@ -1875,6 +1958,7 @@ declare i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1>, i16, <vscale x
declare i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1>, i32, <vscale x 4 x i32>)
declare i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1>, i64, <vscale x 2 x i64>)
declare half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1>, half, <vscale x 8 x half>)
+declare bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(<vscale x 8 x i1>, bfloat, <vscale x 8 x bfloat>)
declare float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1>, float, <vscale x 4 x float>)
declare double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>)
@@ -1888,6 +1972,7 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16
declare <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32>, i64)
declare <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64>, i64)
declare <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half>, i64)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat>, i64)
declare <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float>, i64)
declare <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double>, i64)
@@ -1905,6 +1990,7 @@ declare i16 @llvm.aarch64.sve.lasta.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16
declare i32 @llvm.aarch64.sve.lasta.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
declare i64 @llvm.aarch64.sve.lasta.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
declare half @llvm.aarch64.sve.lasta.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
+declare bfloat @llvm.aarch64.sve.lasta.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>)
declare float @llvm.aarch64.sve.lasta.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
declare float @llvm.aarch64.sve.lasta.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
declare double @llvm.aarch64.sve.lasta.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
@@ -1914,6 +2000,7 @@ declare i16 @llvm.aarch64.sve.lastb.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16
declare i32 @llvm.aarch64.sve.lastb.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
declare i64 @llvm.aarch64.sve.lastb.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
declare half @llvm.aarch64.sve.lastb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
+declare bfloat @llvm.aarch64.sve.lastb.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>)
declare float @llvm.aarch64.sve.lastb.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
declare float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
declare double @llvm.aarch64.sve.lastb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll
index a9ff1648f3b1..9f679aa6dc4f 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll
@@ -57,6 +57,16 @@ define <vscale x 8 x half> @dup_f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %p
ret <vscale x 8 x half> %out
}
+define <vscale x 8 x bfloat> @dup_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> %pg, bfloat %b) #0 {
+; CHECK-LABEL: dup_bf16:
+; CHECK: mov z0.h, p0/m, h1
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> %a,
+ <vscale x 8 x i1> %pg,
+ bfloat %b)
+ ret <vscale x 8 x bfloat> %out
+}
+
define <vscale x 4 x float> @dup_f32(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, float %b) {
; CHECK-LABEL: dup_f32:
; CHECK: mov z0.s, p0/m, s1
@@ -77,10 +87,41 @@ define <vscale x 2 x double> @dup_f64(<vscale x 2 x double> %a, <vscale x 2 x i1
ret <vscale x 2 x double> %out
}
+define <vscale x 8 x bfloat> @test_svdup_n_bf16_z(<vscale x 8 x i1> %pg, bfloat %op) #0 {
+; CHECK-LABEL: test_svdup_n_bf16_z:
+; CHECK: mov z1.h, #0
+; CHECK: mov z1.h, p0/m, h0
+; CHECK: mov z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x i1> %pg, bfloat %op)
+ ret <vscale x 8 x bfloat> %out
+}
+
+define <vscale x 8 x bfloat> @test_svdup_n_bf16_m(<vscale x 8 x bfloat> %inactive, <vscale x 8 x i1> %pg, bfloat %op) #0 {
+; CHECK-LABEL: test_svdup_n_bf16_m:
+; CHECK: mov z0.h, p0/m, h1
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> %inactive, <vscale x 8 x i1> %pg, bfloat %op)
+ ret <vscale x 8 x bfloat> %out
+}
+
+
+define <vscale x 8 x bfloat> @test_svdup_n_bf16_x(<vscale x 8 x i1> %pg, bfloat %op) #0 {
+; CHECK-LABEL: test_svdup_n_bf16_x:
+; CHECK: mov z0.h, p0/m, h0
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> %pg, bfloat %op)
+ ret <vscale x 8 x bfloat> %out
+}
+
declare <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i8)
declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16)
declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64)
declare <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, half)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat)
declare <vscale x 4 x float> @llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float)
declare <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double)
+
+; +bf16 is required for the bfloat version.
+attributes #0 = { "target-features"="+sve,+bf16" }
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
index 0333eb79714b..dd884d5577f0 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
@@ -165,6 +165,14 @@ define <vscale x 8 x half> @insr_f16(<vscale x 8 x half> %a, half %b) {
ret <vscale x 8 x half> %out
}
+define <vscale x 8 x bfloat> @insr_bf16(<vscale x 8 x bfloat> %a, bfloat %b) #0 {
+; CHECK-LABEL: insr_bf16:
+; CHECK: insr z0.h, h1
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.insr.nxv8bf16(<vscale x 8 x bfloat> %a, bfloat %b)
+ ret <vscale x 8 x bfloat> %out
+}
+
define <vscale x 4 x float> @insr_f32(<vscale x 4 x float> %a, float %b) {
; CHECK-LABEL: insr_f32:
; CHECK: insr z0.s, s1
@@ -348,6 +356,7 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.insr.nxv8i16(<vscale x 8 x i16>, i1
declare <vscale x 4 x i32> @llvm.aarch64.sve.insr.nxv4i32(<vscale x 4 x i32>, i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.insr.nxv2i64(<vscale x 2 x i64>, i64)
declare <vscale x 8 x half> @llvm.aarch64.sve.insr.nxv8f16(<vscale x 8 x half>, half)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.insr.nxv8bf16(<vscale x 8 x bfloat>, bfloat)
declare <vscale x 4 x float> @llvm.aarch64.sve.insr.nxv4f32(<vscale x 4 x float>, float)
declare <vscale x 2 x double> @llvm.aarch64.sve.insr.nxv2f64(<vscale x 2 x double>, double)
@@ -368,3 +377,6 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1>, <vsc
declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
+
+; +bf16 is required for the bfloat version.
+attributes #0 = { "target-features"="+sve,+bf16" }
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
index 5bacbee042c0..af43f8fc97e9 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
@@ -172,6 +172,15 @@ define <vscale x 16 x i1> @sve_splat_16xi1(i1 %val) {
;; Splats of legal floating point vector types
+define <vscale x 8 x bfloat> @splat_nxv8bf16(bfloat %val) #0 {
+; CHECK-LABEL: splat_nxv8bf16:
+; CHECK: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = insertelement <vscale x 8 x bfloat> undef, bfloat %val, i32 0
+ %2 = shufflevector <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> undef, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x bfloat> %2
+}
+
define <vscale x 8 x half> @splat_nxv8f16(half %val) {
; CHECK-LABEL: splat_nxv8f16:
; CHECK: mov z0.h, h0
@@ -233,6 +242,13 @@ define <vscale x 8 x half> @splat_nxv8f16_zero() {
ret <vscale x 8 x half> zeroinitializer
}
+define <vscale x 8 x bfloat> @splat_nxv8bf16_zero() #0 {
+; CHECK-LABEL: splat_nxv8bf16_zero:
+; CHECK: mov z0.h, #0
+; CHECK-NEXT: ret
+ ret <vscale x 8 x bfloat> zeroinitializer
+}
+
define <vscale x 4 x half> @splat_nxv4f16_zero() {
; CHECK-LABEL: splat_nxv4f16_zero:
; CHECK: mov z0.h, #0
@@ -321,3 +337,6 @@ define <vscale x 2 x double> @splat_nxv2f64_imm() {
%2 = shufflevector <vscale x 2 x double> %1, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
ret <vscale x 2 x double> %2
}
+
+; +bf16 is required for the bfloat version.
+attributes #0 = { "target-features"="+sve,+bf16" }
More information about the cfe-commits
mailing list