[clang] e73c3bb - [AArch64][SVE] Add bfloat16 to outstanding tuple vector intrinsics

Mon Jun 29 10:01:13 PDT 2020

Author: Cullen Rhodes
Date: 2020-06-29T17:00:58Z
New Revision: e73c3bb06b5a35d13cf96d574ce3b849c5d3d56d

URL: https://github.com/llvm/llvm-project/commit/e73c3bb06b5a35d13cf96d574ce3b849c5d3d56d
DIFF: https://github.com/llvm/llvm-project/commit/e73c3bb06b5a35d13cf96d574ce3b849c5d3d56d.diff

LOG: [AArch64][SVE] Add bfloat16 to outstanding tuple vector intrinsics

Summary:
* svget2/3/4
* svset2/3/4
* svcreate2/3/4
* svundef/2/3/4

Reviewers: sdesmalen, kmclaughlin, fpetrogalli, efriedma

Reviewed By: fpetrogalli

Differential Revision: https://reviews.llvm.org/D82665

Added: 
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2-bfloat.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3-bfloat.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4-bfloat.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef-bfloat.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef2-bfloat.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef3-bfloat.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef4-bfloat.c

Modified: 
    clang/include/clang/Basic/arm_sve.td
    llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 1d1b622349ed..0711293c4f8a 100644

--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1439,6 +1439,16 @@ def SVCREATE_2 : SInst<"svcreate2[_{d}]", "2dd",   "csilUcUsUiUlhfd", MergeNone,
 def SVCREATE_3 : SInst<"svcreate3[_{d}]", "3ddd",  "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tuple_create3", [IsTupleCreate]>;
 def SVCREATE_4 : SInst<"svcreate4[_{d}]", "4dddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tuple_create4", [IsTupleCreate]>;
 
+let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in {
+def SVUNDEF_1_BF16 : SInst<"svundef_{d}",  "d", "b", MergeNone, "", [IsUndef]>;
+def SVUNDEF_2_BF16 : SInst<"svundef2_{d}", "2", "b", MergeNone, "", [IsUndef]>;
+def SVUNDEF_3_BF16 : SInst<"svundef3_{d}", "3", "b", MergeNone, "", [IsUndef]>;
+def SVUNDEF_4_BF16 : SInst<"svundef4_{d}", "4", "b", MergeNone, "", [IsUndef]>;
+
+def SVCREATE_2_BF16 : SInst<"svcreate2[_{d}]", "2dd",   "b", MergeNone, "aarch64_sve_tuple_create2", [IsTupleCreate]>;
+def SVCREATE_3_BF16 : SInst<"svcreate3[_{d}]", "3ddd",  "b", MergeNone, "aarch64_sve_tuple_create3", [IsTupleCreate]>;
+def SVCREATE_4_BF16 : SInst<"svcreate4[_{d}]", "4dddd", "b", MergeNone, "aarch64_sve_tuple_create4", [IsTupleCreate]>;
+}
 
 ////////////////////////////////////////////////////////////////////////////////
 // Vector insertion and extraction
@@ -1450,6 +1460,16 @@ def SVSET_2 : SInst<"svset2[_{d}]", "22id", "csilUcUsUiUlhfd", MergeNone, "aarch
 def SVSET_3 : SInst<"svset3[_{d}]", "33id", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tuple_set", [IsTupleSet], [ImmCheck<1, ImmCheck0_2>]>;
 def SVSET_4 : SInst<"svset4[_{d}]", "44id", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tuple_set", [IsTupleSet], [ImmCheck<1, ImmCheck0_3>]>;
 
+let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in {
+def SVGET_2_BF16 : SInst<"svget2[_{d}]", "d2i", "b", MergeNone, "aarch64_sve_tuple_get", [IsTupleGet], [ImmCheck<1, ImmCheck0_1>]>;
+def SVGET_3_BF16 : SInst<"svget3[_{d}]", "d3i", "b", MergeNone, "aarch64_sve_tuple_get", [IsTupleGet], [ImmCheck<1, ImmCheck0_2>]>;
+def SVGET_4_BF16 : SInst<"svget4[_{d}]", "d4i", "b", MergeNone, "aarch64_sve_tuple_get", [IsTupleGet], [ImmCheck<1, ImmCheck0_3>]>;
+
+def SVSET_2_BF16 : SInst<"svset2[_{d}]", "22id", "b", MergeNone, "aarch64_sve_tuple_set", [IsTupleSet], [ImmCheck<1, ImmCheck0_1>]>;
+def SVSET_3_BF16 : SInst<"svset3[_{d}]", "33id", "b", MergeNone, "aarch64_sve_tuple_set", [IsTupleSet], [ImmCheck<1, ImmCheck0_2>]>;
+def SVSET_4_BF16 : SInst<"svset4[_{d}]", "44id", "b", MergeNone, "aarch64_sve_tuple_set", [IsTupleSet], [ImmCheck<1, ImmCheck0_3>]>;
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // SVE2 WhileGE/GT
 let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in {

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c
new file mode 100644
index 000000000000..b8b0630981c5
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svbfloat16x2_t test_svcreate2_bf16(svbfloat16_t x0, svbfloat16_t x1)
+{
+  // CHECK-LABEL: test_svcreate2_bf16
+  // CHECK: %[[CREATE:.*]] = call <vscale x 16 x bfloat> @llvm.aarch64.sve.tuple.create2.nxv16bf16.nxv8bf16(<vscale x 8 x bfloat> %x0, <vscale x 8 x bfloat> %x1)
+  // CHECK-NEXT: ret <vscale x 16 x bfloat> %[[CREATE]]
+  // expected-warning at +1 {{implicit declaration of function 'svcreate2_bf16'}}
+  return SVE_ACLE_FUNC(svcreate2,_bf16,,)(x0, x1);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c
new file mode 100644
index 000000000000..cb5160e76fc8
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svbfloat16x3_t test_svcreate3_bf16(svbfloat16_t x0, svbfloat16_t x1, svbfloat16_t x2)
+{
+  // CHECK-LABEL: test_svcreate3_bf16
+  // CHECK: %[[CREATE:.*]] = call <vscale x 24 x bfloat> @llvm.aarch64.sve.tuple.create3.nxv24bf16.nxv8bf16(<vscale x 8 x bfloat> %x0, <vscale x 8 x bfloat> %x1, <vscale x 8 x bfloat> %x2)
+  // CHECK-NEXT: ret <vscale x 24 x bfloat> %[[CREATE]]
+  // expected-warning at +1 {{implicit declaration of function 'svcreate3_bf16'}}
+  return SVE_ACLE_FUNC(svcreate3,_bf16,,)(x0, x1, x2);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c
new file mode 100644
index 000000000000..42130f5ac502
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svbfloat16x4_t test_svcreate4_bf16(svbfloat16_t x0, svbfloat16_t x1, svbfloat16_t x2, svbfloat16_t x4)
+{
+  // CHECK-LABEL: test_svcreate4_bf16
+  // CHECK: %[[CREATE:.*]] = call <vscale x 32 x bfloat> @llvm.aarch64.sve.tuple.create4.nxv32bf16.nxv8bf16(<vscale x 8 x bfloat> %x0, <vscale x 8 x bfloat> %x1, <vscale x 8 x bfloat> %x2, <vscale x 8 x bfloat> %x4)
+  // CHECK-NEXT: ret <vscale x 32 x bfloat> %[[CREATE]]
+  // expected-warning at +1 {{implicit declaration of function 'svcreate4_bf16'}}
+  return SVE_ACLE_FUNC(svcreate4,_bf16,,)(x0, x1, x2, x4);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c
new file mode 100644
index 000000000000..c2ed817ab09a
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svbfloat16_t test_svget2_bf16_0(svbfloat16x2_t tuple)
+{
+  // CHECK-LABEL: test_svget2_bf16_0
+  // CHECK: %[[EXT:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> %tuple, i32 0)
+  // CHECK-NEXT: ret <vscale x 8 x bfloat> %[[EXT]]
+  // expected-warning at +1 {{implicit declaration of function 'svget2_bf16'}}
+  return SVE_ACLE_FUNC(svget2,_bf16,,)(tuple, 0);
+}
+
+svbfloat16_t test_svget2_bf16_1(svbfloat16x2_t tuple)
+{
+  // CHECK-LABEL: test_svget2_bf16_1
+  // CHECK: %[[EXT:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> %tuple, i32 1)
+  // CHECK-NEXT: ret <vscale x 8 x bfloat> %[[EXT]]
+  // expected-warning at +1 {{implicit declaration of function 'svget2_bf16'}}
+  return SVE_ACLE_FUNC(svget2,_bf16,,)(tuple, 1);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c
new file mode 100644
index 000000000000..bfb86ba9b055
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svbfloat16_t test_svget3_bf16_0(svbfloat16x3_t tuple)
+{
+  // CHECK-LABEL: test_svget3_bf16_0
+  // CHECK: %[[EXT:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> %tuple, i32 0)
+  // CHECK-NEXT: ret <vscale x 8 x bfloat> %[[EXT]]
+  // expected-warning at +1 {{implicit declaration of function 'svget3_bf16'}}
+  return SVE_ACLE_FUNC(svget3,_bf16,,)(tuple, 0);
+}
+
+svbfloat16_t test_svget3_bf16_1(svbfloat16x3_t tuple)
+{
+  // CHECK-LABEL: test_svget3_bf16_1
+  // CHECK: %[[EXT:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> %tuple, i32 1)
+  // CHECK-NEXT: ret <vscale x 8 x bfloat> %[[EXT]]
+  // expected-warning at +1 {{implicit declaration of function 'svget3_bf16'}}
+  return SVE_ACLE_FUNC(svget3,_bf16,,)(tuple, 1);
+}
+
+svbfloat16_t test_svget3_bf16_2(svbfloat16x3_t tuple)
+{
+  // CHECK-LABEL: test_svget3_bf16_2
+  // CHECK: %[[EXT:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> %tuple, i32 2)
+  // CHECK-NEXT: ret <vscale x 8 x bfloat> %[[EXT]]
+  // expected-warning at +1 {{implicit declaration of function 'svget3_bf16'}}
+  return SVE_ACLE_FUNC(svget3,_bf16,,)(tuple, 2);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c
new file mode 100644
index 000000000000..96f1a49ece33
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c
@@ -0,0 +1,48 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svbfloat16_t test_svget4_bf16_0(svbfloat16x4_t tuple)
+{
+  // CHECK-LABEL: test_svget4_bf16_0
+  // CHECK: %[[EXT:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> %tuple, i32 0)
+  // CHECK-NEXT: ret <vscale x 8 x bfloat> %[[EXT]]
+  // expected-warning at +1 {{implicit declaration of function 'svget4_bf16'}}
+  return SVE_ACLE_FUNC(svget4,_bf16,,)(tuple, 0);
+}
+
+svbfloat16_t test_svget4_bf16_1(svbfloat16x4_t tuple)
+{
+  // CHECK-LABEL: test_svget4_bf16_1
+  // CHECK: %[[EXT:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> %tuple, i32 1)
+  // CHECK-NEXT: ret <vscale x 8 x bfloat> %[[EXT]]
+  // expected-warning at +1 {{implicit declaration of function 'svget4_bf16'}}
+  return SVE_ACLE_FUNC(svget4,_bf16,,)(tuple, 1);
+}
+
+svbfloat16_t test_svget4_bf16_2(svbfloat16x4_t tuple)
+{
+  // CHECK-LABEL: test_svget4_bf16_2
+  // CHECK: %[[EXT:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> %tuple, i32 2)
+  // CHECK-NEXT: ret <vscale x 8 x bfloat> %[[EXT]]
+  // expected-warning at +1 {{implicit declaration of function 'svget4_bf16'}}
+  return SVE_ACLE_FUNC(svget4,_bf16,,)(tuple, 2);
+}
+
+svbfloat16_t test_svget4_bf16_3(svbfloat16x4_t tuple)
+{
+  // CHECK-LABEL: test_svget4_bf16_3
+  // CHECK: %[[EXT:.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> %tuple, i32 3)
+  // CHECK-NEXT: ret <vscale x 8 x bfloat> %[[EXT]]
+  // expected-warning at +1 {{implicit declaration of function 'svget4_bf16'}}
+  return SVE_ACLE_FUNC(svget4,_bf16,,)(tuple, 3);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2-bfloat.c
new file mode 100644
index 000000000000..47c7f92974c1
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2-bfloat.c
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svbfloat16x2_t test_svset2_bf16_0(svbfloat16x2_t tuple, svbfloat16_t x)
+{
+  // CHECK-LABEL: test_svset2_bf16_0
+  // CHECK: %[[INSERT:.*]] = call <vscale x 16 x bfloat> @llvm.aarch64.sve.tuple.set.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> %tuple, i32 0, <vscale x 8 x bfloat> %x)
+  // CHECK-NEXT: ret <vscale x 16 x bfloat> %[[INSERT]]
+  // expected-warning at +1 {{implicit declaration of function 'svset2_bf16'}}
+  return SVE_ACLE_FUNC(svset2,_bf16,,)(tuple, 0, x);
+}
+
+svbfloat16x2_t test_svset2_bf16_1(svbfloat16x2_t tuple, svbfloat16_t x)
+{
+  // CHECK-LABEL: test_svset2_bf16_1
+  // CHECK: %[[INSERT:.*]] = call <vscale x 16 x bfloat> @llvm.aarch64.sve.tuple.set.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> %tuple, i32 1, <vscale x 8 x bfloat> %x)
+  // CHECK-NEXT: ret <vscale x 16 x bfloat> %[[INSERT]]
+  // expected-warning at +1 {{implicit declaration of function 'svset2_bf16'}}
+  return SVE_ACLE_FUNC(svset2,_bf16,,)(tuple, 1, x);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3-bfloat.c
new file mode 100644
index 000000000000..e1e1a197b668
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3-bfloat.c
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+
+svbfloat16x3_t test_svset3_bf16_0(svbfloat16x3_t tuple, svbfloat16_t x)
+{
+  // CHECK-LABEL: test_svset3_bf16_0
+  // CHECK: %[[INSERT:.*]] = call <vscale x 24 x bfloat> @llvm.aarch64.sve.tuple.set.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> %tuple, i32 0, <vscale x 8 x bfloat> %x)
+  // CHECK-NEXT: ret <vscale x 24 x bfloat> %[[INSERT]]
+  // expected-warning at +1 {{implicit declaration of function 'svset3_bf16'}}
+  return SVE_ACLE_FUNC(svset3,_bf16,,)(tuple, 0, x);
+}
+
+svbfloat16x3_t test_svset3_bf16_1(svbfloat16x3_t tuple, svbfloat16_t x)
+{
+  // CHECK-LABEL: test_svset3_bf16_1
+  // CHECK: %[[INSERT:.*]] = call <vscale x 24 x bfloat> @llvm.aarch64.sve.tuple.set.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> %tuple, i32 1, <vscale x 8 x bfloat> %x)
+  // CHECK-NEXT: ret <vscale x 24 x bfloat> %[[INSERT]]
+  // expected-warning at +1 {{implicit declaration of function 'svset3_bf16'}}
+  return SVE_ACLE_FUNC(svset3,_bf16,,)(tuple, 1, x);
+}
+
+svbfloat16x3_t test_svset3_bf16_2(svbfloat16x3_t tuple, svbfloat16_t x)
+{
+  // CHECK-LABEL: test_svset3_bf16_2
+  // CHECK: %[[INSERT:.*]] = call <vscale x 24 x bfloat> @llvm.aarch64.sve.tuple.set.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> %tuple, i32 2, <vscale x 8 x bfloat> %x)
+  // CHECK-NEXT: ret <vscale x 24 x bfloat> %[[INSERT]]
+  // expected-warning at +1 {{implicit declaration of function 'svset3_bf16'}}
+  return SVE_ACLE_FUNC(svset3,_bf16,,)(tuple, 2, x);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4-bfloat.c
new file mode 100644
index 000000000000..09f52a8ebf43
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4-bfloat.c
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+
+svbfloat16x4_t test_svset4_bf16_0(svbfloat16x4_t tuple, svbfloat16_t x)
+{
+  // CHECK-LABEL: test_svset4_bf16_0
+  // CHECK: %[[INSERT:.*]] = call <vscale x 32 x bfloat> @llvm.aarch64.sve.tuple.set.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> %tuple, i32 0, <vscale x 8 x bfloat> %x)
+  // CHECK-NEXT: ret <vscale x 32 x bfloat> %[[INSERT]]
+  // expected-warning at +1 {{implicit declaration of function 'svset4_bf16'}}
+  return SVE_ACLE_FUNC(svset4,_bf16,,)(tuple, 0, x);
+}
+
+svbfloat16x4_t test_svset4_bf16_1(svbfloat16x4_t tuple, svbfloat16_t x)
+{
+  // CHECK-LABEL: test_svset4_bf16_1
+  // CHECK: %[[INSERT:.*]] = call <vscale x 32 x bfloat> @llvm.aarch64.sve.tuple.set.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> %tuple, i32 1, <vscale x 8 x bfloat> %x)
+  // CHECK-NEXT: ret <vscale x 32 x bfloat> %[[INSERT]]
+  // expected-warning at +1 {{implicit declaration of function 'svset4_bf16'}}
+  return SVE_ACLE_FUNC(svset4,_bf16,,)(tuple, 1, x);
+}
+
+svbfloat16x4_t test_svset4_bf16_2(svbfloat16x4_t tuple, svbfloat16_t x)
+{
+  // CHECK-LABEL: test_svset4_bf16_2
+  // CHECK: %[[INSERT:.*]] = call <vscale x 32 x bfloat> @llvm.aarch64.sve.tuple.set.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> %tuple, i32 2, <vscale x 8 x bfloat> %x)
+  // CHECK-NEXT: ret <vscale x 32 x bfloat> %[[INSERT]]
+  // expected-warning at +1 {{implicit declaration of function 'svset4_bf16'}}
+  return SVE_ACLE_FUNC(svset4,_bf16,,)(tuple, 2, x);
+}
+
+svbfloat16x4_t test_svset4_bf16_3(svbfloat16x4_t tuple, svbfloat16_t x)
+{
+  // CHECK-LABEL: test_svset4_bf16_3
+  // CHECK: %[[INSERT:.*]] = call <vscale x 32 x bfloat> @llvm.aarch64.sve.tuple.set.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> %tuple, i32 3, <vscale x 8 x bfloat> %x)
+  // CHECK-NEXT: ret <vscale x 32 x bfloat> %[[INSERT]]
+  // expected-warning at +1 {{implicit declaration of function 'svset4_bf16'}}
+  return SVE_ACLE_FUNC(svset4,_bf16,,)(tuple, 3, x);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef-bfloat.c
new file mode 100644
index 000000000000..07e974ca7179
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef-bfloat.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+svbfloat16_t test_svundef_bf16()
+{
+  // CHECK-LABEL: test_svundef_bf16
+  // CHECK: ret <vscale x 8 x bfloat> undef
+  // expected-warning at +1 {{implicit declaration of function 'svundef_bf16'}}
+  return svundef_bf16();
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef2-bfloat.c
new file mode 100644
index 000000000000..629b4ae0839f
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef2-bfloat.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+svbfloat16x2_t test_svundef2_bf16()
+{
+  // CHECK-LABEL: test_svundef2_bf16
+  // CHECK: ret <vscale x 16 x bfloat> undef
+  // expected-warning at +1 {{implicit declaration of function 'svundef2_bf16'}}
+  return svundef2_bf16();
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef3-bfloat.c
new file mode 100644
index 000000000000..dd577b19c477
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef3-bfloat.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+svbfloat16x3_t test_svundef3_bf16()
+{
+  // CHECK-LABEL: test_svundef3_bf16
+  // CHECK: ret <vscale x 24 x bfloat> undef
+  // expected-warning at +1 {{implicit declaration of function 'svundef3_bf16'}}
+  return svundef3_bf16();
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef4-bfloat.c
new file mode 100644
index 000000000000..49d708d27e8f
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef4-bfloat.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s
+
+#include <arm_sve.h>
+
+svbfloat16x4_t test_svundef4_bf16()
+{
+  // CHECK-LABEL: test_svundef4_bf16
+  // CHECK: ret <vscale x 32 x bfloat> undef
+  // expected-warning at +1 {{implicit declaration of function 'svundef4_bf16'}}
+  return svundef4_bf16();
+}

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll
index 38b05e42b962..2cae21f6c449 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll
@@ -93,6 +93,37 @@ L2:
   ret <vscale x 8 x half> %extract
 }
 
+;
+; SVCREATE2 (bfloat)
+;
+
+define <vscale x 8 x bfloat> @test_svcreate2_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1) local_unnamed_addr #1 {
+; CHECK-LABEL: test_svcreate2_bf16_vec0:
+; CHECK: // %L2
+; CHECK-NEXT: ret
+  %tuple = tail call <vscale x 16 x bfloat> @llvm.aarch64.sve.tuple.create2.nxv16bf16.nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1)
+  br i1 %p, label %L1, label %L2
+L1:
+  ret <vscale x 8 x bfloat> undef
+L2:
+  %extract = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> %tuple, i32 0)
+  ret <vscale x 8 x bfloat> %extract
+}
+
+define <vscale x 8 x bfloat> @test_svcreate2_bf16_vec1(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1) local_unnamed_addr #1 {
+; CHECK-LABEL: test_svcreate2_bf16_vec1:
+; CHECK: // %L2
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ret
+  %tuple = tail call <vscale x 16 x bfloat> @llvm.aarch64.sve.tuple.create2.nxv16bf16.nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1)
+  br i1 %p, label %L1, label %L2
+L1:
+  ret <vscale x 8 x bfloat> undef
+L2:
+  %extract = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> %tuple, i32 1)
+  ret <vscale x 8 x bfloat> %extract
+}
+
 ;
 ; SVCREATE2 (i32)
 ;
@@ -278,6 +309,7 @@ L2:
   %extract = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16(<vscale x 24 x i16> %tuple, i32 2)
   ret <vscale x 8 x i16> %extract
 }
+
 ;
 ; SVCREATE3 (half)
 ;
@@ -309,6 +341,36 @@ L2:
   ret <vscale x 8 x half> %extract
 }
 
+;
+; SVCREATE3 (bfloat)
+;
+
+define <vscale x 8 x bfloat> @test_svcreate3_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2) local_unnamed_addr #1 {
+; CHECK-LABEL: test_svcreate3_bf16_vec0:
+; CHECK: // %L2
+; CHECK-NEXT: ret
+  %tuple = tail call <vscale x 24 x bfloat> @llvm.aarch64.sve.tuple.create3.nxv24bf16.nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2)
+  br i1 %p, label %L1, label %L2
+L1:
+  ret <vscale x 8 x bfloat> undef
+L2:
+  %extract = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> %tuple, i32 0)
+  ret <vscale x 8 x bfloat> %extract
+}
+
+define <vscale x 8 x bfloat> @test_svcreate3_bf16_vec2(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2) local_unnamed_addr #1 {
+; CHECK-LABEL: test_svcreate3_bf16_vec2:
+; CHECK: // %L2
+; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: ret
+  %tuple = tail call <vscale x 24 x bfloat> @llvm.aarch64.sve.tuple.create3.nxv24bf16.nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2)
+  br i1 %p, label %L1, label %L2
+L1:
+  ret <vscale x 8 x bfloat> undef
+L2:
+  %extract = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> %tuple, i32 2)
+  ret <vscale x 8 x bfloat> %extract
+}
 
 ;
 ; SVCREATE3 (i32)
@@ -527,6 +589,37 @@ L2:
   ret <vscale x 8 x half> %extract
 }
 
+;
+; SVCREATE4 (bfloat)
+;
+
+define <vscale x 8 x bfloat> @test_svcreate4_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3) local_unnamed_addr #1 {
+; CHECK-LABEL: test_svcreate4_bf16_vec0:
+; CHECK: // %L2
+; CHECK-NEXT: ret
+  %tuple = tail call <vscale x 32 x bfloat> @llvm.aarch64.sve.tuple.create4.nxv32bf16.nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3)
+  br i1 %p, label %L1, label %L2
+L1:
+  ret <vscale x 8 x bfloat> undef
+L2:
+  %extract = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> %tuple, i32 0)
+  ret <vscale x 8 x bfloat> %extract
+}
+
+define <vscale x 8 x bfloat> @test_svcreate4_bf16_vec3(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3) local_unnamed_addr #1 {
+; CHECK-LABEL: test_svcreate4_bf16_vec3:
+; CHECK: // %L2
+; CHECK-NEXT: mov z0.d, z3.d
+; CHECK-NEXT: ret
+  %tuple = tail call <vscale x 32 x bfloat> @llvm.aarch64.sve.tuple.create4.nxv32bf16.nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3)
+  br i1 %p, label %L1, label %L2
+L1:
+  ret <vscale x 8 x bfloat> undef
+L2:
+  %extract = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> %tuple, i32 3)
+  ret <vscale x 8 x bfloat> %extract
+}
+
 ;
 ; SVCREATE4 (i32)
 ;
@@ -652,10 +745,13 @@ L2:
 }
 
 attributes #0 = { nounwind "target-features"="+sve" }
+; +bf16 is required for the bfloat version.
+attributes #1 = { nounwind "target-features"="+sve,+bf16" }
 
 declare <vscale x 4 x double>  @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
 declare <vscale x 8 x float>  @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
 declare <vscale x 16 x half>  @llvm.aarch64.sve.tuple.create2.nxv16f16.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 16 x bfloat>  @llvm.aarch64.sve.tuple.create2.nxv16bf16.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
 declare <vscale x 4 x i64>  @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
 declare <vscale x 8 x i32>  @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
@@ -664,6 +760,7 @@ declare <vscale x 32 x i8>  @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vsc
 declare <vscale x 6 x double>  @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
 declare <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
 declare <vscale x 24 x half> @llvm.aarch64.sve.tuple.create3.nxv24f16.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 24 x bfloat> @llvm.aarch64.sve.tuple.create3.nxv24bf16.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
 declare <vscale x 6 x i64>  @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
@@ -672,6 +769,7 @@ declare <vscale x 48 x i8>  @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(<vsc
 declare <vscale x 8 x double>  @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64 (<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
 declare <vscale x 16 x float>  @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
 declare <vscale x 32 x half>  @llvm.aarch64.sve.tuple.create4.nxv32f16.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 32 x bfloat>  @llvm.aarch64.sve.tuple.create4.nxv32bf16.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
 declare <vscale x  8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
@@ -693,6 +791,10 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64(<vscale x
 declare <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64(<vscale x 6 x i64>, i32 immarg)
 declare <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64>, i32 immarg)
 
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat>, i32 immarg)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat>, i32 immarg)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat>, i32 immarg)
+
 declare <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16(<vscale x 16 x half>, i32 immarg)
 declare <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16(<vscale x 24 x half>, i32 immarg)
 declare <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(<vscale x 32 x half>, i32 immarg)