[clang] [AArch64] Implement reinterpret builtins for SVE vector tuples (PR #69598)

Thu Oct 19 08:55:32 PDT 2023

================
@@ -1,336 +1,813 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DTUPLE=x2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=TUPLE2
+// RUN: %clang_cc1 -DTUPLE=x4 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=TUPLE4
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DTUPLE=x2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-TUPLE2
+// RUN: %clang_cc1 -DTUPLE=x4 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-TUPLE4
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -DTUPLE=x2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=TUPLE2
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -DTUPLE=x4 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=TUPLE4
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -DTUPLE=x2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-TUPLE2
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -DTUPLE=x4 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-TUPLE4
 
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#ifdef TUPLE
+#define TYPE_1(base,tuple) base ## tuple ## _t
+#define TYPE_0(base,tuple) TYPE_1(base,tuple)
+#define TYPE(base) TYPE_0(base,TUPLE)
+#else
+#define TYPE(base) base ## _t
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
-// A simple used,unused... macro, long enough to represent any SVE builtin.
-#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1
+#else
+#ifdef TUPLE
+#define SVE_ACLE_FUNC_1(A1,A2,T) A1##A2##_##T
+#define SVE_ACLE_FUNC_0(A1,A2,T) SVE_ACLE_FUNC_1(A1,A2,T)
+#define SVE_ACLE_FUNC(A1,A2) SVE_ACLE_FUNC_0(A1,A2,TUPLE)
 #else
-#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#define SVE_ACLE_FUNC(A1,A2) A1##A2
+#endif
 #endif
 
 // CHECK-LABEL: @test_svreinterpret_s8_bf16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 8 x bfloat> [[OP:%.*]] to <vscale x 16 x i8>
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
+// TUPLE2-LABEL: @test_svreinterpret_s8_bf16(
+// TUPLE2-NEXT:  entry:
+// TUPLE2-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x bfloat> [[OP:%.*]] to <vscale x 32 x i8>
+// TUPLE2-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
+//
+// TUPLE4-LABEL: @test_svreinterpret_s8_bf16(
+// TUPLE4-NEXT:  entry:
+// TUPLE4-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 32 x bfloat> [[OP:%.*]] to <vscale x 64 x i8>
+// TUPLE4-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
+//
 // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s8_bf16u14__SVBFloat16_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 8 x bfloat> [[OP:%.*]] to <vscale x 16 x i8>
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svreinterpret_s8_bf16(svbfloat16_t op) {
-  return SVE_ACLE_FUNC(svreinterpret_s8, _bf16, , )(op);
+// CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s8_bf1614svbfloat16x2_t(
+// CPP-TUPLE2-NEXT:  entry:
+// CPP-TUPLE2-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x bfloat> [[OP:%.*]] to <vscale x 32 x i8>
+// CPP-TUPLE2-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
+//
+// CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s8_bf1614svbfloat16x4_t(
+// CPP-TUPLE4-NEXT:  entry:
+// CPP-TUPLE4-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 32 x bfloat> [[OP:%.*]] to <vscale x 64 x i8>
+// CPP-TUPLE4-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
+//
+TYPE(svint8) test_svreinterpret_s8_bf16(TYPE(svbfloat16) op) {
----------------
CarolineConcatto wrote:

I was expecting something like svint8x2_t as  output.

https://github.com/llvm/llvm-project/pull/69598