[clang] [AArch64] Implement reinterpret builtins for SVE vector tuples (PR #69598)

Thu Oct 19 05:32:45 PDT 2023

llvmbot wrote:




@llvm/pr-subscribers-clang-codegen

Author: Momchil Velikov (momchil-velikov)

<details>
<summary>Changes</summary>

This patch adds reinterpret builtins in the form

    sv<dst>x<N>_t svreinterpret_<dst>_<src>_x<N>(sv<src>x<N>_t op)

where
  - <src> and <dst> designate the source and the destination type, respectively, all pairs chosen from {s8, u8, s16, u8, s32, u32, s64, u64, bf16, f16, f32, f64}
  - <N> designated the number of tuple elements, 2 or 4

A short (overloaded) for is also provided, where the destination type is explicitly designated and the source type is deduced from the parameter type. These take the form

    sv<dst>x<N>_t svreinterpret_<dst>(sv<src>x<N>_t op)

For example:

    svuin16x2_t svreinterpret_u16_s32_x2(svint32x2_t op);
    svuin16x2_t svreinterpret_u16(svint32x2_t op);

---

Patch is 241.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/69598.diff


4 Files Affected:

- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+1-1) 
- (modified) clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret-bfloat.c (+526-49) 
- (modified) clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret.c (+2729-245) 
- (modified) clang/utils/TableGen/SveEmitter.cpp (+74-27) 


``````````diff

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 2b341b8090fad7d..2cff27dacd4b1c4 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9903,7 +9903,7 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
 
   llvm::Type *Ty = ConvertType(E->getType());
   if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
-      BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64) {
+      BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
     Value *Val = EmitScalarExpr(E->getArg(0));
     return EmitSVEReinterpret(Val, Ty);
   }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret-bfloat.c
index 489434bea2691c9..c66695bb16eabdd 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret-bfloat.c
@@ -1,19 +1,40 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DTUPLE=x2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=TUPLE2
+// RUN: %clang_cc1 -DTUPLE=x4 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=TUPLE4
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DTUPLE=x2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-TUPLE2
+// RUN: %clang_cc1 -DTUPLE=x4 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-TUPLE4
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -DTUPLE=x2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=TUPLE2
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -DTUPLE=x4 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=TUPLE4
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -DTUPLE=x2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-TUPLE2
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -DTUPLE=x4 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-TUPLE4
 
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#ifdef TUPLE
+#define TYPE_1(base,tuple) base ## tuple ## _t
+#define TYPE_0(base,tuple) TYPE_1(base,tuple)
+#define TYPE(base) TYPE_0(base,TUPLE)
+#else
+#define TYPE(base) base ## _t
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
-// A simple used,unused... macro, long enough to represent any SVE builtin.
-#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1
+#else
+#ifdef TUPLE
+#define SVE_ACLE_FUNC_1(A1,A2,T) A1##A2##_##T
+#define SVE_ACLE_FUNC_0(A1,A2,T) SVE_ACLE_FUNC_1(A1,A2,T)
+#define SVE_ACLE_FUNC(A1,A2) SVE_ACLE_FUNC_0(A1,A2,TUPLE)
 #else
-#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#define SVE_ACLE_FUNC(A1,A2) A1##A2
+#endif
 #endif
 
 // CHECK-LABEL: @test_svreinterpret_s8_bf16(
@@ -21,13 +42,33 @@
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 8 x bfloat> [[OP:%.*]] to <vscale x 16 x i8>
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
+// TUPLE2-LABEL: @test_svreinterpret_s8_bf16(
+// TUPLE2-NEXT:  entry:
+// TUPLE2-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x bfloat> [[OP:%.*]] to <vscale x 32 x i8>
+// TUPLE2-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
+//
+// TUPLE4-LABEL: @test_svreinterpret_s8_bf16(
+// TUPLE4-NEXT:  entry:
+// TUPLE4-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 32 x bfloat> [[OP:%.*]] to <vscale x 64 x i8>
+// TUPLE4-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
+//
 // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s8_bf16u14__SVBFloat16_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 8 x bfloat> [[OP:%.*]] to <vscale x 16 x i8>
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svreinterpret_s8_bf16(svbfloat16_t op) {
-  return SVE_ACLE_FUNC(svreinterpret_s8, _bf16, , )(op);
+// CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s8_bf1614svbfloat16x2_t(
+// CPP-TUPLE2-NEXT:  entry:
+// CPP-TUPLE2-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x bfloat> [[OP:%.*]] to <vscale x 32 x i8>
+// CPP-TUPLE2-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
+//
+// CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s8_bf1614svbfloat16x4_t(
+// CPP-TUPLE4-NEXT:  entry:
+// CPP-TUPLE4-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 32 x bfloat> [[OP:%.*]] to <vscale x 64 x i8>
+// CPP-TUPLE4-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
+//
+TYPE(svint8) test_svreinterpret_s8_bf16(TYPE(svbfloat16) op) {
+  return SVE_ACLE_FUNC(svreinterpret_s8, _bf16)(op);
 }
 
 // CHECK-LABEL: @test_svreinterpret_s16_bf16(
@@ -35,13 +76,33 @@ svint8_t test_svreinterpret_s8_bf16(svbfloat16_t op) {
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 8 x bfloat> [[OP:%.*]] to <vscale x 8 x i16>
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
+// TUPLE2-LABEL: @test_svreinterpret_s16_bf16(
+// TUPLE2-NEXT:  entry:
+// TUPLE2-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x bfloat> [[OP:%.*]] to <vscale x 16 x i16>
+// TUPLE2-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
+//
+// TUPLE4-LABEL: @test_svreinterpret_s16_bf16(
+// TUPLE4-NEXT:  entry:
+// TUPLE4-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 32 x bfloat> [[OP:%.*]] to <vscale x 32 x i16>
+// TUPLE4-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
+//
 // CPP-CHECK-LABEL: @_Z27test_svreinterpret_s16_bf16u14__SVBFloat16_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 8 x bfloat> [[OP:%.*]] to <vscale x 8 x i16>
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svreinterpret_s16_bf16(svbfloat16_t op) {
-  return SVE_ACLE_FUNC(svreinterpret_s16, _bf16, , )(op);
+// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_s16_bf1614svbfloat16x2_t(
+// CPP-TUPLE2-NEXT:  entry:
+// CPP-TUPLE2-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x bfloat> [[OP:%.*]] to <vscale x 16 x i16>
+// CPP-TUPLE2-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
+//
+// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_s16_bf1614svbfloat16x4_t(
+// CPP-TUPLE4-NEXT:  entry:
+// CPP-TUPLE4-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 32 x bfloat> [[OP:%.*]] to <vscale x 32 x i16>
+// CPP-TUPLE4-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
+//
+TYPE(svint16) test_svreinterpret_s16_bf16(TYPE(svbfloat16) op) {
+  return SVE_ACLE_FUNC(svreinterpret_s16, _bf16)(op);
 }
 
 // CHECK-LABEL: @test_svreinterpret_s32_bf16(
@@ -49,26 +110,66 @@ svint16_t test_svreinterpret_s16_bf16(svbfloat16_t op) {
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 8 x bfloat> [[OP:%.*]] to <vscale x 4 x i32>
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
+// TUPLE2-LABEL: @test_svreinterpret_s32_bf16(
+// TUPLE2-NEXT:  entry:
+// TUPLE2-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x bfloat> [[OP:%.*]] to <vscale x 8 x i32>
+// TUPLE2-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+// TUPLE4-LABEL: @test_svreinterpret_s32_bf16(
+// TUPLE4-NEXT:  entry:
+// TUPLE4-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 32 x bfloat> [[OP:%.*]] to <vscale x 16 x i32>
+// TUPLE4-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
 // CPP-CHECK-LABEL: @_Z27test_svreinterpret_s32_bf16u14__SVBFloat16_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 8 x bfloat> [[OP:%.*]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svreinterpret_s32_bf16(svbfloat16_t op) {
-  return SVE_ACLE_FUNC(svreinterpret_s32, _bf16, , )(op);
+// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_s32_bf1614svbfloat16x2_t(
+// CPP-TUPLE2-NEXT:  entry:
+// CPP-TUPLE2-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x bfloat> [[OP:%.*]] to <vscale x 8 x i32>
+// CPP-TUPLE2-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_s32_bf1614svbfloat16x4_t(
+// CPP-TUPLE4-NEXT:  entry:
+// CPP-TUPLE4-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 32 x bfloat> [[OP:%.*]] to <vscale x 16 x i32>
+// CPP-TUPLE4-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+TYPE(svint32) test_svreinterpret_s32_bf16(TYPE(svbfloat16) op) {
+  return SVE_ACLE_FUNC(svreinterpret_s32, _bf16)(op);
 }
 // CHECK-LABEL: @test_svreinterpret_s64_bf16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 8 x bfloat> [[OP:%.*]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
+// TUPLE2-LABEL: @test_svreinterpret_s64_bf16(
+// TUPLE2-NEXT:  entry:
+// TUPLE2-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x bfloat> [[OP:%.*]] to <vscale x 4 x i64>
+// TUPLE2-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+// TUPLE4-LABEL: @test_svreinterpret_s64_bf16(
+// TUPLE4-NEXT:  entry:
+// TUPLE4-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 32 x bfloat> [[OP:%.*]] to <vscale x 8 x i64>
+// TUPLE4-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
 // CPP-CHECK-LABEL: @_Z27test_svreinterpret_s64_bf16u14__SVBFloat16_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 8 x bfloat> [[OP:%.*]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svreinterpret_s64_bf16(svbfloat16_t op) {
-  return SVE_ACLE_FUNC(svreinterpret_s64, _bf16, , )(op);
+// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_s64_bf1614svbfloat16x2_t(
+// CPP-TUPLE2-NEXT:  entry:
+// CPP-TUPLE2-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x bfloat> [[OP:%.*]] to <vscale x 4 x i64>
+// CPP-TUPLE2-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_s64_bf1614svbfloat16x4_t(
+// CPP-TUPLE4-NEXT:  entry:
+// CPP-TUPLE4-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 32 x bfloat> [[OP:%.*]] to <vscale x 8 x i64>
+// CPP-TUPLE4-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+TYPE(svint64) test_svreinterpret_s64_bf16(TYPE(svbfloat16) op) {
+  return SVE_ACLE_FUNC(svreinterpret_s64, _bf16)(op);
 }
 
 // CHECK-LABEL: @test_svreinterpret_u8_bf16(
@@ -76,13 +177,33 @@ svint64_t test_svreinterpret_s64_bf16(svbfloat16_t op) {
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 8 x bfloat> [[OP:%.*]] to <vscale x 16 x i8>
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
+// TUPLE2-LABEL: @test_svreinterpret_u8_bf16(
+// TUPLE2-NEXT:  entry:
+// TUPLE2-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x bfloat> [[OP:%.*]] to <vscale x 32 x i8>
+// TUPLE2-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
+//
+// TUPLE4-LABEL: @test_svreinterpret_u8_bf16(
+// TUPLE4-NEXT:  entry:
+// TUPLE4-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 32 x bfloat> [[OP:%.*]] to <vscale x 64 x i8>
+// TUPLE4-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
+//
 // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u8_bf16u14__SVBFloat16_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 8 x bfloat> [[OP:%.*]] to <vscale x 16 x i8>
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svreinterpret_u8_bf16(svbfloat16_t op) {
-  return SVE_ACLE_FUNC(svreinterpret_u8, _bf16, , )(op);
+// CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u8_bf1614svbfloat16x2_t(
+// CPP-TUPLE2-NEXT:  entry:
+// CPP-TUPLE2-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x bfloat> [[OP:%.*]] to <vscale x 32 x i8>
+// CPP-TUPLE2-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
+//
+// CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u8_bf1614svbfloat16x4_t(
+// CPP-TUPLE4-NEXT:  entry:
+// CPP-TUPLE4-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 32 x bfloat> [[OP:%.*]] to <vscale x 64 x i8>
+// CPP-TUPLE4-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
+//
+TYPE(svuint8) test_svreinterpret_u8_bf16(TYPE(svbfloat16) op) {
+  return SVE_ACLE_FUNC(svreinterpret_u8, _bf16)(op);
 }
 
 // CHECK-LABEL: @test_svreinterpret_u16_bf16(
@@ -90,13 +211,33 @@ svuint8_t test_svreinterpret_u8_bf16(svbfloat16_t op) {
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 8 x bfloat> [[OP:%.*]] to <vscale x 8 x i16>
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
+// TUPLE2-LABEL: @test_svreinterpret_u16_bf16(
+// TUPLE2-NEXT:  entry:
+// TUPLE2-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x bfloat> [[OP:%.*]] to <vscale x 16 x i16>
+// TUPLE2-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
+//
+// TUPLE4-LABEL: @test_svreinterpret_u16_bf16(
+// TUPLE4-NEXT:  entry:
+// TUPLE4-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 32 x bfloat> [[OP:%.*]] to <vscale x 32 x i16>
+// TUPLE4-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
+//
 // CPP-CHECK-LABEL: @_Z27test_svreinterpret_u16_bf16u14__SVBFloat16_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 8 x bfloat> [[OP:%.*]] to <vscale x 8 x i16>
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svreinterpret_u16_bf16(svbfloat16_t op) {
-  return SVE_ACLE_FUNC(svreinterpret_u16, _bf16, , )(op);
+// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_u16_bf1614svbfloat16x2_t(
+// CPP-TUPLE2-NEXT:  entry:
+// CPP-TUPLE2-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x bfloat> [[OP:%.*]] to <vscale x 16 x i16>
+// CPP-TUPLE2-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
+//
+// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_u16_bf1614svbfloat16x4_t(
+// CPP-TUPLE4-NEXT:  entry:
+// CPP-TUPLE4-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 32 x bfloat> [[OP:%.*]] to <vscale x 32 x i16>
+// CPP-TUPLE4-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
+//
+TYPE(svuint16) test_svreinterpret_u16_bf16(TYPE(svbfloat16) op) {
+  return SVE_ACLE_FUNC(svreinterpret_u16, _bf16)(op);
 }
 
 // CHECK-LABEL: @test_svreinterpret_u32_bf16(
@@ -104,13 +245,33 @@ svuint16_t test_svreinterpret_u16_bf16(svbfloat16_t op) {
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 8 x bfloat> [[OP:%.*]] to <vscale x 4 x i32>
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
+// TUPLE2-LABEL: @test_svreinterpret_u32_bf16(
+// TUPLE2-NEXT:  entry:
+// TUPLE2-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x bfloat> [[OP:%.*]] to <vscale x 8 x i32>
+// TUPLE2-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+// TUPLE4-LABEL: @test_svreinterpret_u32_bf16(
+// TUPLE4-NEXT:  entry:
+// TUPLE4-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 32 x bfloat> [[OP:%.*]] to <vscale x 16 x i32>
+// TUPLE4-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
 // CPP-CHECK-LABEL: @_Z27test_svreinterpret_u32_bf16u14__SVBFloat16_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 8 x bfloat> [[OP:%.*]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svreinterpret_u32_bf16(svbfloat16_t op) {
-  return SVE_ACLE_FUNC(svreinterpret_u32, _bf16, , )(op);
+// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_u32_bf1614svbfloat16x2_t(
+// CPP-TUPLE2-NEXT:  entry:
+// CPP-TUPLE2-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x bfloat> [[OP:%.*]] to <vscale x 8 x i32>
+// CPP-TUPLE2-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_u32_bf1614svbfloat16x4_t(
+// CPP-TUPLE4-NEXT:  entry:
+// CPP-TUPLE4-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 32 x bfloat> [[OP:%.*]] to <vscale x 16 x i32>
+// CPP-TUPLE4-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+TYPE(svuint32) test_svreinterpret_u32_bf16(TYPE(svbfloat16) op) {
+  return SVE_ACLE_FUNC(svreinterpret_u32, _bf16)(op);
 }
 
 // CHECK-LABEL: @test_svreinterpret_u64_bf16(
@@ -118,13 +279,33 @@ svuint32_t test_svreinterpret_u32_bf16(svbfloat16_t op) {
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 8 x bfloat> [[OP:%.*]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
+// TUPLE2-LABEL: @test_svreinterpret_u64_bf16(
+// TUPLE2-NEXT:  entry:
+// TUPLE2-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x bfloat> [[OP:%.*]] to <vscale x 4 x i64>
+// TUPLE2-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+// TUPLE4-LABEL: @test_svreinterpret_u64_bf16(
+// TUPLE4-NEXT:  entry:
+// TUPLE4-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 32 x bfloat> [[OP:%.*]] to <vscale x 8 x i64>
+// TUPLE4-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
 // CPP-CHECK-LABEL: @_Z27test_svreinterpret_u64_bf16u14__SVBFloat16_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 8 x bfloat> [[OP:%.*]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svreinterpret_u64_bf16(svbfloat16_t op) {
-  return SVE_ACLE_FUNC(svreinterpret_u64, _bf16, , )(op);
+// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_u64_bf1614svbfloat16x2_t(
+// CPP-TUPLE2-NEXT:  entry:
+// CPP-TUPLE2-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x bfloat> [[OP:%.*]] to <vscale x 4 x i64>
+// CPP-TUPLE2-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_u64_bf1614svbfloat16x4_t(
+// CPP-TUPLE4-NEXT:  entry:
+// CPP-TUPLE4-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 32 x bfloat> [[OP:%.*]] to <vscale x 8 x i64>
+// CPP-TUPLE4-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+TYPE(svuint64) test_svreinterpret_u64_bf16(TYPE(svbfloat16) op) {
+  return SVE_ACLE_FUNC(svreinterpret_u64, _bf16)(op);
 }
 
 // CHECK-LABEL: @test_svreinterpret_bf16_s8(
@@ -132,13 +313,33 @@ svuint64_t test_svreinterpret_u64_bf16(svbfloat16_t op) {
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i8> [[OP:%.*]] to <vscale x 8 x bfloat>
 // CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
+// TUPLE2-LABEL: @test_svreinterpret_bf16_s8(
+// TUPLE2-NEXT:  entry:
+// TUPLE2-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 32 x i8> [[OP:%.*]] to <vscale x 16 x bfloat>
+// TUPLE2-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+// TUPLE4-LABEL: @test_svreinterpret_bf16_s8(
+// TUPLE4-NEXT:  entry:
+// TUPLE4-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 64 x i8> [[OP:%.*]] to <vscale x 32 x bfloat>
+// TUPLE4-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
 // CPP-CHECK-LABEL: @_Z26test_svreinterpret_bf16_s8u10__SVInt8_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i8> [[OP:%.*]] to <vscale x 8 x bfloat>
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-svbfloat16_t test_svreinterpret_bf16_s8(svint8_t op) {
-  return SVE_ACLE_FUNC(svreinterpret_bf16, _s8, , )(op);
+// CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_bf16_s810svint8x2_t(
+// CPP-TUPLE2-NEXT:  entry:
+// CPP-TUPLE2-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 32 x i8> [[OP:%.*]] to <vscale x 16 x bfloat>
+// CPP-TUPLE2-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+// CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_bf16_s810svint8x4_t(
+// CPP-TUPLE4-NEXT:  entry:
+// CPP-TUPLE4-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 6...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/69598