[clang] [llvm] [Clang][LLVM][AArch64] Add intrinsic for MOVT SME2 instruction (PR #97602)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 23 09:07:31 PDT 2024
https://github.com/CarolineConcatto updated https://github.com/llvm/llvm-project/pull/97602
>From 70d1ec0e1c1bd896cf753510a8452325b086430e Mon Sep 17 00:00:00 2001
From: Caroline Concatto <caroline.concatto at arm.com>
Date: Wed, 3 Jul 2024 15:55:45 +0000
Subject: [PATCH 1/6] [Clang][LLVM][AArch64] Add intrinsic for MOVT SME2
instruction
This patch adds these intrinsics:
// Variants are also available for:
// [_s8], [_u16], [_s16], [_u32], [_s32], [_u64], [_s64]
// [_bf16], [_f16], [_f32], [_f64]
void svwrite_lane_zt[_u8](uint64_t zt0, svuint8_t zt, uint64_t idx) __arm_streaming __arm_inout("zt0");
void svwrite_zt[_u8](uint64_t zt0, svuint8_t zt) __arm_streaming __arm_inout("zt0");
according to PR#324[1]
[1]https://github.com/ARM-software/acle/pull/324
---
clang/include/clang/Basic/arm_sme.td | 5 +
.../acle_sme2_write_lane_zt.c | 401 ++++++++++++++++++
.../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 10 +-
llvm/include/llvm/IR/IntrinsicsAArch64.td | 9 +
.../Target/AArch64/AArch64ISelLowering.cpp | 2 +
.../lib/Target/AArch64/AArch64SMEInstrInfo.td | 2 +-
llvm/lib/Target/AArch64/SMEInstrFormats.td | 42 +-
.../AArch64/sme2-intrinsics-read-zt.ll | 162 +++++++
8 files changed, 630 insertions(+), 3 deletions(-)
create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c
create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-read-zt.ll
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index ce8908f566f2fd..ff68e536e99b09 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -817,4 +817,9 @@ multiclass ZAReadzArray<string vg_num>{
defm SVREADZ_VG2 : ZAReadzArray<"2">;
defm SVREADZ_VG4 : ZAReadzArray<"4">;
+
+let SMETargetGuard = "sme2,sme-lutv2" in {
+ def SVWRITE_LANE_ZT : SInst<"svwrite_lane_zt[_{d}]", "vidi", "cUcsUsiUilUlfhdb", MergeNone, "aarch64_sme_write_lane_zt", [IsStreaming, IsInOutZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
+ def SVWRITE_ZT : SInst<"svwrite_zt[_{d}]", "vid", "cUcsUsiUilUlfhdb", MergeNone, "aarch64_sme_write_zt", [IsStreaming, IsInOutZT0], [ImmCheck<0, ImmCheck0_0>]>;
+}
} // let SVETargetGuard = InvalidMode
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c
new file mode 100644
index 00000000000000..9bdc3481953a21
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c
@@ -0,0 +1,401 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -S -Werror -Wall -o /dev/null %s
+// REQUIRES: aarch64-registered-target
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1
+#else
+#define SVE_ACLE_FUNC(A1,A2) A1##A2
+#endif
+
+#include <arm_sme.h>
+
+// CHECK-LABEL: define dso_local void @test_write_lane_zt_u8_1(
+// CHECK-SAME: <vscale x 16 x i8> [[V:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[V]], i32 1)
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z23test_write_lane_zt_u8_1u11__SVUint8_t(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[V:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[V]], i32 1)
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_lane_zt_u8_1(svuint8_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_lane_zt, _u8)(0, v, 1);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_lane_zt_s8_2(
+// CHECK-SAME: <vscale x 16 x i8> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[V]], i32 2)
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z23test_write_lane_zt_s8_2u10__SVInt8_t(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[V]], i32 2)
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_lane_zt_s8_2(svint8_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_lane_zt, _s8)(0, v, 2);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_lane_zt_u16_3(
+// CHECK-SAME: <vscale x 8 x i16> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[V]], i32 1)
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z24test_write_lane_zt_u16_3u12__SVUint16_t(
+// CHECK-CXX-SAME: <vscale x 8 x i16> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[V]], i32 1)
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_lane_zt_u16_3(svuint16_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_lane_zt, _u16)(0, v, 1);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_lane_zt_s16_1(
+// CHECK-SAME: <vscale x 8 x i16> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[V]], i32 1)
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z24test_write_lane_zt_s16_1u11__SVInt16_t(
+// CHECK-CXX-SAME: <vscale x 8 x i16> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[V]], i32 1)
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_lane_zt_s16_1(svint16_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_lane_zt, _s16)(0, v, 1);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_lane_zt_u32_2(
+// CHECK-SAME: <vscale x 4 x i32> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[V]], i32 2)
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z24test_write_lane_zt_u32_2u12__SVUint32_t(
+// CHECK-CXX-SAME: <vscale x 4 x i32> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[V]], i32 2)
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_lane_zt_u32_2(svuint32_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_lane_zt, _u32)(0, v, 2);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_lane_zt_s32_3(
+// CHECK-SAME: <vscale x 4 x i32> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[V]], i32 3)
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z24test_write_lane_zt_s32_3u11__SVInt32_t(
+// CHECK-CXX-SAME: <vscale x 4 x i32> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[V]], i32 3)
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_lane_zt_s32_3(svint32_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_lane_zt, _s32)(0, v, 3);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_lane_zt_u64_0(
+// CHECK-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]], i32 0)
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z24test_write_lane_zt_u64_0u12__SVUint64_t(
+// CHECK-CXX-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]], i32 0)
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_lane_zt_u64_0(svuint64_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_lane_zt, _u64)(0, v, 0);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_lane_zt_s64_1(
+// CHECK-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]], i32 1)
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z24test_write_lane_zt_s64_1u11__SVInt64_t(
+// CHECK-CXX-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]], i32 1)
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_lane_zt_s64_1(svint64_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_lane_zt, _s64)(0, v, 1);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_lane_zt_f16_2(
+// CHECK-SAME: <vscale x 8 x half> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv8f16(i32 0, <vscale x 8 x half> [[V]], i32 2)
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z24test_write_lane_zt_f16_2u13__SVFloat16_t(
+// CHECK-CXX-SAME: <vscale x 8 x half> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv8f16(i32 0, <vscale x 8 x half> [[V]], i32 2)
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_lane_zt_f16_2(svfloat16_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_lane_zt, _f16)(0, v, 2);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_lane_zt_bf16_3(
+// CHECK-SAME: <vscale x 8 x bfloat> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv8bf16(i32 0, <vscale x 8 x bfloat> [[V]], i32 3)
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z25test_write_lane_zt_bf16_3u14__SVBfloat16_t(
+// CHECK-CXX-SAME: <vscale x 8 x bfloat> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv8bf16(i32 0, <vscale x 8 x bfloat> [[V]], i32 3)
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_lane_zt_bf16_3(svbfloat16_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_lane_zt, _bf16)(0, v, 3);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_lane_zt_f32_0(
+// CHECK-SAME: <vscale x 4 x float> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv4f32(i32 0, <vscale x 4 x float> [[V]], i32 0)
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z24test_write_lane_zt_f32_0u13__SVFloat32_t(
+// CHECK-CXX-SAME: <vscale x 4 x float> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv4f32(i32 0, <vscale x 4 x float> [[V]], i32 0)
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_lane_zt_f32_0(svfloat32_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_lane_zt, _f32)(0, v, 0);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_lane_zt_f64_1(
+// CHECK-SAME: <vscale x 2 x double> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv2f64(i32 0, <vscale x 2 x double> [[V]], i32 1)
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z24test_write_lane_zt_f64_1u13__SVFloat64_t(
+// CHECK-CXX-SAME: <vscale x 2 x double> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv2f64(i32 0, <vscale x 2 x double> [[V]], i32 1)
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_lane_zt_f64_1(svfloat64_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_lane_zt, _f64)(0, v, 1);
+}
+
+//ALIAS
+// CHECK-LABEL: define dso_local void @test_write_zt_u8(
+// CHECK-SAME: <vscale x 16 x i8> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[V]])
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z16test_write_zt_u8u11__SVUint8_t(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[V]])
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_zt_u8(svuint8_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_zt, _u8)(0, v);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_zt_s8(
+// CHECK-SAME: <vscale x 16 x i8> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[V]])
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z16test_write_zt_s8u10__SVInt8_t(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[V]])
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_zt_s8(svint8_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_zt, _s8)(0, v);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_zt_u16(
+// CHECK-SAME: <vscale x 8 x i16> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[V]])
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z17test_write_zt_u16u12__SVUint16_t(
+// CHECK-CXX-SAME: <vscale x 8 x i16> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[V]])
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_zt_u16(svuint16_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_zt, _u16)(0, v);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_zt_s16(
+// CHECK-SAME: <vscale x 8 x i16> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[V]])
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z17test_write_zt_s16u11__SVInt16_t(
+// CHECK-CXX-SAME: <vscale x 8 x i16> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[V]])
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_zt_s16(svint16_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_zt, _s16)(0, v);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_zt_u32(
+// CHECK-SAME: <vscale x 4 x i32> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[V]])
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z17test_write_zt_u32u12__SVUint32_t(
+// CHECK-CXX-SAME: <vscale x 4 x i32> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[V]])
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_zt_u32(svuint32_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_zt, _u32)(0, v);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_zt_s32(
+// CHECK-SAME: <vscale x 4 x i32> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[V]])
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z17test_write_zt_s32u11__SVInt32_t(
+// CHECK-CXX-SAME: <vscale x 4 x i32> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[V]])
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_zt_s32(svint32_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_zt, _s32)(0, v);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_zt_u64(
+// CHECK-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]])
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z17test_write_zt_u64u12__SVUint64_t(
+// CHECK-CXX-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]])
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_zt_u64(svuint64_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_zt, _u64)(0, v);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_zt_s64(
+// CHECK-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]])
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z17test_write_zt_s64u11__SVInt64_t(
+// CHECK-CXX-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]])
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_zt_s64(svint64_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_zt, _s64)(0, v);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_zt_f16(
+// CHECK-SAME: <vscale x 8 x half> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv8f16(i32 0, <vscale x 8 x half> [[V]])
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z17test_write_zt_f16u13__SVFloat16_t(
+// CHECK-CXX-SAME: <vscale x 8 x half> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv8f16(i32 0, <vscale x 8 x half> [[V]])
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_zt_f16(svfloat16_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_zt, _f16)(0, v);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_zt_bf16(
+// CHECK-SAME: <vscale x 8 x bfloat> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv8bf16(i32 0, <vscale x 8 x bfloat> [[V]])
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z18test_write_zt_bf16u14__SVBfloat16_t(
+// CHECK-CXX-SAME: <vscale x 8 x bfloat> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv8bf16(i32 0, <vscale x 8 x bfloat> [[V]])
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_zt_bf16(svbfloat16_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_zt, _bf16)(0, v);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_zt_f32(
+// CHECK-SAME: <vscale x 4 x float> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv4f32(i32 0, <vscale x 4 x float> [[V]])
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z17test_write_zt_f32u13__SVFloat32_t(
+// CHECK-CXX-SAME: <vscale x 4 x float> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv4f32(i32 0, <vscale x 4 x float> [[V]])
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_zt_f32(svfloat32_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_zt, _f32)(0, v);
+}
+
+// CHECK-LABEL: define dso_local void @test_write__zt_f64(
+// CHECK-SAME: <vscale x 2 x double> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv2f64(i32 0, <vscale x 2 x double> [[V]])
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z18test_write__zt_f64u13__SVFloat64_t(
+// CHECK-CXX-SAME: <vscale x 2 x double> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv2f64(i32 0, <vscale x 2 x double> [[V]])
+// CHECK-CXX-NEXT: ret void
+//
+void test_write__zt_f64(svfloat64_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_zt, _f64)(0, v);
+}
diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
index 5de97649af5d3a..d37090c6afb1ff 100644
--- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
+++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \
-// RUN: -target-feature +bf16 -target-feature +sve -target-feature +sme -target-feature +sve2 -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -fsyntax-only -verify %s
+// RUN: -target-feature +bf16 -target-feature +sve -target-feature +sme -target-feature +sve2 -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sme-lutv2 -fsyntax-only -verify %s
// REQUIRES: aarch64-registered-target
@@ -350,3 +350,11 @@ void test_svdot_multi_za32_bad_lane(uint32_t slice_base, svuint16_t z_u16,
svsudot_lane_za32_s8_vg1x2(slice_base, z_s8x2, z_u8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
svsudot_lane_za32_s8_vg1x4(slice_base, z_s8x4, z_u8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
}
+
+void test_read_zt() __arm_streaming __arm_inout("zt0") {
+ // Check Zt tile 0
+ svwrite_lane_zt(1, svundef_s8(), 1); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
+ svwrite_zt(1, svundef_s8()); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
+ // Check index
+ svwrite_lane_zt(0, svundef_s8(), 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 6f3694cf952d47..2561497db1043e 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -2879,6 +2879,15 @@ let TargetPrefix = "aarch64" in {
[llvm_i32_ty],
[IntrNoMem, IntrHasSideEffects]>;
+ def int_aarch64_sme_write_lane_zt
+ : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, llvm_i32_ty],
+ [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrNoMem, IntrHasSideEffects]>;
+
+ def int_aarch64_sme_write_zt
+ : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty],
+ [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
+
+
def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
class SME_OuterProduct_Intrinsic
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e0c3cc5eddb827..f8c01717239360 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3192,6 +3192,8 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
return EmitZero(MI, BB);
case AArch64::ZERO_T_PSEUDO:
return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*Op0IsDef=*/true);
+ case AArch64::MOVT_TIZ_PSEUDO:
+ return EmitZTInstr(MI, BB, AArch64::MOVT_TIZ, /*Op0IsDef=*/true);
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 709a98d3a8cb4d..66089047bc07cb 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -935,7 +935,7 @@ defm FAMIN_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"famin", 0b0010101>;
} //[HasSME2, HasFAMINMAX]
let Predicates = [HasSME2, HasSME_LUTv2] in {
-defm MOVT : sme2_movt_zt_to_zt<"movt", 0b0011111>;
+defm MOVT_TIZ : sme2_movt_zt_to_zt<"movt", 0b0011111, int_aarch64_sme_write_lane_zt, int_aarch64_sme_write_zt>;
def LUTI4_4ZZT2Z : sme2_luti4_vector_vg4<0b00, 0b00,"luti4">;
} //[HasSME2, HasSME_LUTv2]
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 77cf5cb56728b9..782169ae2ef1e2 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -3278,10 +3278,50 @@ class sme2_movt_zt_to_zt<string mnemonic, bits<7> opc>
let Inst{4-0} = Zt;
}
-multiclass sme2_movt_zt_to_zt<string mnemonic, bits<7> opc> {
+multiclass sme2_movt_zt_to_zt<string mnemonic, bits<7> opc, SDPatternOperator intrinsic_lane, SDPatternOperator intrinsic> {
def NAME : sme2_movt_zt_to_zt<mnemonic, opc>;
+ def NAME # _PSEUDO
+ : Pseudo<(outs), (ins ZTR:$ZT, sme_elm_idx0_3:$off2, ZPRAny:$Zt), []>, Sched<[]> {
+ let usesCustomInserter = 1;
+ }
def : InstAlias<mnemonic # "\t$ZTt, $Zt",
(!cast<Instruction>(NAME) ZTR:$ZTt, 0, ZPRAny:$Zt), 1>;
+
+ def : Pat<(intrinsic_lane (imm_to_zt untyped:$zt), nxv16i8:$zn, sme_elm_idx0_3:$imm),
+ (!cast<Instruction>(NAME # _PSEUDO) $zt, sme_elm_idx0_3:$imm, $zn)>;
+ def : Pat<(intrinsic_lane (imm_to_zt untyped:$zt), nxv8i16:$zn, sme_elm_idx0_3:$imm),
+ (!cast<Instruction>(NAME # _PSEUDO) $zt, sme_elm_idx0_3:$imm, $zn)>;
+ def : Pat<(intrinsic_lane (imm_to_zt untyped:$zt), nxv4i32:$zn, sme_elm_idx0_3:$imm),
+ (!cast<Instruction>(NAME # _PSEUDO) $zt, sme_elm_idx0_3:$imm, $zn)>;
+ def : Pat<(intrinsic_lane (imm_to_zt untyped:$zt), nxv2i64:$zn, sme_elm_idx0_3:$imm),
+ (!cast<Instruction>(NAME # _PSEUDO) $zt, sme_elm_idx0_3:$imm, $zn)>;
+ def : Pat<(intrinsic_lane (imm_to_zt untyped:$zt), nxv8f16:$zn, sme_elm_idx0_3:$imm),
+ (!cast<Instruction>(NAME # _PSEUDO) $zt, sme_elm_idx0_3:$imm, $zn)>;
+ def : Pat<(intrinsic_lane (imm_to_zt untyped:$zt), nxv4f32:$zn, sme_elm_idx0_3:$imm),
+ (!cast<Instruction>(NAME # _PSEUDO) $zt, sme_elm_idx0_3:$imm, $zn)>;
+ def : Pat<(intrinsic_lane (imm_to_zt untyped:$zt), nxv2f64:$zn, sme_elm_idx0_3:$imm),
+ (!cast<Instruction>(NAME # _PSEUDO) $zt, sme_elm_idx0_3:$imm, $zn)>;
+ def : Pat<(intrinsic_lane (imm_to_zt untyped:$zt), nxv8bf16:$zn, sme_elm_idx0_3:$imm),
+ (!cast<Instruction>(NAME # _PSEUDO) $zt, sme_elm_idx0_3:$imm, $zn)>;
+
+ //Alias intrinsic
+ def : Pat<(intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn),
+ (!cast<Instruction>(NAME # _PSEUDO) $zt, 0, $zn)>;
+ def : Pat<(intrinsic (imm_to_zt untyped:$zt), nxv8i16:$zn),
+ (!cast<Instruction>(NAME # _PSEUDO) $zt, 0, $zn)>;
+ def : Pat<(intrinsic (imm_to_zt untyped:$zt), nxv4i32:$zn),
+ (!cast<Instruction>(NAME # _PSEUDO) $zt, 0, $zn)>;
+ def : Pat<(intrinsic (imm_to_zt untyped:$zt), nxv2i64:$zn),
+ (!cast<Instruction>(NAME # _PSEUDO) $zt, 0, $zn)>;
+ def : Pat<(intrinsic (imm_to_zt untyped:$zt), nxv8f16:$zn),
+ (!cast<Instruction>(NAME # _PSEUDO) $zt, 0, $zn)>;
+ def : Pat<(intrinsic (imm_to_zt untyped:$zt), nxv4f32:$zn),
+ (!cast<Instruction>(NAME # _PSEUDO) $zt, 0, $zn)>;
+ def : Pat<(intrinsic (imm_to_zt untyped:$zt), nxv2f64:$zn),
+ (!cast<Instruction>(NAME # _PSEUDO) $zt, 0, $zn)>;
+ def : Pat<(intrinsic (imm_to_zt untyped:$zt), nxv8bf16:$zn),
+ (!cast<Instruction>(NAME # _PSEUDO) $zt, 0, $zn)>;
+
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-read-zt.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-read-zt.ll
new file mode 100644
index 00000000000000..d877eff1d3ad92
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-read-zt.ll
@@ -0,0 +1,162 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -force-streaming < %s | FileCheck %s
+
+target triple = "aarch64-linux"
+
+
+define void @test_write_zt_i8_0(<vscale x 16 x i8> %zn) #0 {
+; CHECK-LABEL: test_write_zt_i8_0:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movt zt0, z0
+; CHECK-NEXT: ret
+ call void @llvm.aarch64.sme.write.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> %zn, i32 0)
+ ret void
+}
+
+define void @test_write_zt_i8_1(<vscale x 16 x i8> %zn) #0 {
+; CHECK-LABEL: test_write_zt_i8_1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movt zt0[1, mul vl], z0
+; CHECK-NEXT: ret
+ call void @llvm.aarch64.sme.write.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> %zn, i32 1)
+ ret void
+}
+
+define void @test_write_zt_i16_2(<vscale x 8 x i16> %zn) #0 {
+; CHECK-LABEL: test_write_zt_i16_2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movt zt0[2, mul vl], z0
+; CHECK-NEXT: ret
+ call void @llvm.aarch64.sme.write.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> %zn, i32 2)
+ ret void
+}
+
+define void @test_write_zt_i32_3(<vscale x 4 x i32> %zn) #0 {
+; CHECK-LABEL: test_write_zt_i32_3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movt zt0[3, mul vl], z0
+; CHECK-NEXT: ret
+ call void @llvm.aarch64.sme.write.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> %zn, i32 3)
+ ret void
+}
+
+define void @test_write_zt_i64_1(<vscale x 2 x i64> %zn) #0 {
+; CHECK-LABEL: test_write_zt_i64_1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movt zt0[1, mul vl], z0
+; CHECK-NEXT: ret
+ call void @llvm.aarch64.sme.write.lane.zt.nxv2i64(i32 0, <vscale x 2 x i64> %zn, i32 1)
+ ret void
+}
+
+define void @test_write_zt_f16_2(<vscale x 8 x half> %zn) #0 {
+; CHECK-LABEL: test_write_zt_f16_2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movt zt0[2, mul vl], z0
+; CHECK-NEXT: ret
+ call void @llvm.aarch64.sme.write.lane.zt.nxv8f16(i32 0, <vscale x 8 x half> %zn, i32 2)
+ ret void
+}
+
+define void @test_write_zt_f32_3(<vscale x 4 x float> %zn) #0 {
+; CHECK-LABEL: test_write_zt_f32_3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movt zt0[3, mul vl], z0
+; CHECK-NEXT: ret
+ call void @llvm.aarch64.sme.write.lane.zt.nxv4f32(i32 0, <vscale x 4 x float> %zn, i32 3)
+ ret void
+}
+
+define void @test_write_zt_f64_1(<vscale x 2 x double> %zn) #0 {
+; CHECK-LABEL: test_write_zt_f64_1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movt zt0[1, mul vl], z0
+; CHECK-NEXT: ret
+ call void @llvm.aarch64.sme.write.lane.zt.nxv2f64(i32 0, <vscale x 2 x double> %zn, i32 1)
+ ret void
+}
+
+define void @test_write_zt_bf16_2(<vscale x 8 x bfloat> %zn) #0 {
+; CHECK-LABEL: test_write_zt_bf16_2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movt zt0[2, mul vl], z0
+; CHECK-NEXT: ret
+ call void @llvm.aarch64.sme.write.lane.zt.nxv8bf16(i32 0, <vscale x 8 x bfloat> %zn, i32 2)
+ ret void
+}
+
+;; ALIAS
+
+define void @test_write_zt_i8(<vscale x 16 x i8> %v) #0 {
+; CHECK-LABEL: test_write_zt_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movt zt0, z0
+; CHECK-NEXT: ret
+ tail call void @llvm.aarch64.sme.write.zt.nxv16i8(i32 0, <vscale x 16 x i8> %v)
+ ret void
+}
+
+define void @test_write_zt_i16(<vscale x 8 x i16> %v) #0 {
+; CHECK-LABEL: test_write_zt_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movt zt0, z0
+; CHECK-NEXT: ret
+ tail call void @llvm.aarch64.sme.write.zt.nxv8i16(i32 0, <vscale x 8 x i16> %v)
+ ret void
+}
+
+define void @test_write_zt_i32(<vscale x 4 x i32> %v) #0 {
+; CHECK-LABEL: test_write_zt_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movt zt0, z0
+; CHECK-NEXT: ret
+ tail call void @llvm.aarch64.sme.write.zt.nxv4i32(i32 0, <vscale x 4 x i32> %v)
+ ret void
+}
+
+define void @test_write_zt_i64(<vscale x 2 x i64> %v) #0 {
+; CHECK-LABEL: test_write_zt_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movt zt0, z0
+; CHECK-NEXT: ret
+ tail call void @llvm.aarch64.sme.write.zt.nxv2i64(i32 0, <vscale x 2 x i64> %v)
+ ret void
+}
+
+define void @test_write_zt_f16(<vscale x 8 x half> %v) #0 {
+; CHECK-LABEL: test_write_zt_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movt zt0, z0
+; CHECK-NEXT: ret
+ tail call void @llvm.aarch64.sme.write.zt.nxv8f16(i32 0, <vscale x 8 x half> %v)
+ ret void
+}
+
+define void @test_write_zt_bf16(<vscale x 8 x bfloat> %v) #0 {
+; CHECK-LABEL: test_write_zt_bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movt zt0, z0
+; CHECK-NEXT: ret
+ tail call void @llvm.aarch64.sme.write.zt.nxv8bf16(i32 0, <vscale x 8 x bfloat> %v)
+ ret void
+}
+
+define void @test_write_zt_f32(<vscale x 4 x float> %v) #0 {
+; CHECK-LABEL: test_write_zt_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movt zt0, z0
+; CHECK-NEXT: ret
+ tail call void @llvm.aarch64.sme.write.zt.nxv4f32(i32 0, <vscale x 4 x float> %v)
+ ret void
+}
+
+define void @test_write_zt_f64(<vscale x 2 x double> %v) #0 {
+; CHECK-LABEL: test_write_zt_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movt zt0, z0
+; CHECK-NEXT: ret
+ tail call void @llvm.aarch64.sme.write.zt.nxv2f64(i32 0, <vscale x 2 x double> %v)
+ ret void
+}
+
+attributes #0 = { "target-features"="+sme2,+sme-lutv2" }
>From 865c108cc254c7c8fd4878b73427a0ee2544acec Mon Sep 17 00:00:00 2001
From: Caroline Concatto <caroline.concatto at arm.com>
Date: Mon, 8 Jul 2024 15:49:36 +0000
Subject: [PATCH 2/6] Fix write_zt to out ZT0 and move rename llvm-ir test
---
clang/include/clang/Basic/arm_sme.td | 2 +-
.../acle_sme2_write_lane_zt.c | 72 +++++++++----------
...read-zt.ll => sme2-intrinsics-write-zt.ll} | 0
3 files changed, 37 insertions(+), 37 deletions(-)
rename llvm/test/CodeGen/AArch64/{sme2-intrinsics-read-zt.ll => sme2-intrinsics-write-zt.ll} (100%)
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index ff68e536e99b09..fcddc7e03c1ab3 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -820,6 +820,6 @@ defm SVREADZ_VG4 : ZAReadzArray<"4">;
let SMETargetGuard = "sme2,sme-lutv2" in {
def SVWRITE_LANE_ZT : SInst<"svwrite_lane_zt[_{d}]", "vidi", "cUcsUsiUilUlfhdb", MergeNone, "aarch64_sme_write_lane_zt", [IsStreaming, IsInOutZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
- def SVWRITE_ZT : SInst<"svwrite_zt[_{d}]", "vid", "cUcsUsiUilUlfhdb", MergeNone, "aarch64_sme_write_zt", [IsStreaming, IsInOutZT0], [ImmCheck<0, ImmCheck0_0>]>;
+ def SVWRITE_ZT : SInst<"svwrite_zt[_{d}]", "vid", "cUcsUsiUilUlfhdb", MergeNone, "aarch64_sme_write_zt", [IsStreaming, IsOutZT0], [ImmCheck<0, ImmCheck0_0>]>;
}
} // let SVETargetGuard = InvalidMode
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c
index 9bdc3481953a21..9b7b32a536b650 100644
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c
@@ -209,193 +209,193 @@ void test_write_lane_zt_f64_1(svfloat64_t v) __arm_streaming __arm_inout("zt0")
//ALIAS
// CHECK-LABEL: define dso_local void @test_write_zt_u8(
-// CHECK-SAME: <vscale x 16 x i8> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: <vscale x 16 x i8> [[V:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[V]])
// CHECK-NEXT: ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z16test_write_zt_u8u11__SVUint8_t(
-// CHECK-CXX-SAME: <vscale x 16 x i8> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[V:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[V]])
// CHECK-CXX-NEXT: ret void
//
-void test_write_zt_u8(svuint8_t v) __arm_streaming __arm_inout("zt0") {
+void test_write_zt_u8(svuint8_t v) __arm_streaming __arm_out("zt0") {
SVE_ACLE_FUNC(svwrite_zt, _u8)(0, v);
}
// CHECK-LABEL: define dso_local void @test_write_zt_s8(
-// CHECK-SAME: <vscale x 16 x i8> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: <vscale x 16 x i8> [[V:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[V]])
// CHECK-NEXT: ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z16test_write_zt_s8u10__SVInt8_t(
-// CHECK-CXX-SAME: <vscale x 16 x i8> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[V:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[V]])
// CHECK-CXX-NEXT: ret void
//
-void test_write_zt_s8(svint8_t v) __arm_streaming __arm_inout("zt0") {
+void test_write_zt_s8(svint8_t v) __arm_streaming __arm_out("zt0") {
SVE_ACLE_FUNC(svwrite_zt, _s8)(0, v);
}
// CHECK-LABEL: define dso_local void @test_write_zt_u16(
-// CHECK-SAME: <vscale x 8 x i16> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: <vscale x 8 x i16> [[V:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[V]])
// CHECK-NEXT: ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z17test_write_zt_u16u12__SVUint16_t(
-// CHECK-CXX-SAME: <vscale x 8 x i16> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-SAME: <vscale x 8 x i16> [[V:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[V]])
// CHECK-CXX-NEXT: ret void
//
-void test_write_zt_u16(svuint16_t v) __arm_streaming __arm_inout("zt0") {
+void test_write_zt_u16(svuint16_t v) __arm_streaming __arm_out("zt0") {
SVE_ACLE_FUNC(svwrite_zt, _u16)(0, v);
}
// CHECK-LABEL: define dso_local void @test_write_zt_s16(
-// CHECK-SAME: <vscale x 8 x i16> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: <vscale x 8 x i16> [[V:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[V]])
// CHECK-NEXT: ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z17test_write_zt_s16u11__SVInt16_t(
-// CHECK-CXX-SAME: <vscale x 8 x i16> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-SAME: <vscale x 8 x i16> [[V:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[V]])
// CHECK-CXX-NEXT: ret void
//
-void test_write_zt_s16(svint16_t v) __arm_streaming __arm_inout("zt0") {
+void test_write_zt_s16(svint16_t v) __arm_streaming __arm_out("zt0") {
SVE_ACLE_FUNC(svwrite_zt, _s16)(0, v);
}
// CHECK-LABEL: define dso_local void @test_write_zt_u32(
-// CHECK-SAME: <vscale x 4 x i32> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: <vscale x 4 x i32> [[V:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[V]])
// CHECK-NEXT: ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z17test_write_zt_u32u12__SVUint32_t(
-// CHECK-CXX-SAME: <vscale x 4 x i32> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-SAME: <vscale x 4 x i32> [[V:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[V]])
// CHECK-CXX-NEXT: ret void
//
-void test_write_zt_u32(svuint32_t v) __arm_streaming __arm_inout("zt0") {
+void test_write_zt_u32(svuint32_t v) __arm_streaming __arm_out("zt0") {
SVE_ACLE_FUNC(svwrite_zt, _u32)(0, v);
}
// CHECK-LABEL: define dso_local void @test_write_zt_s32(
-// CHECK-SAME: <vscale x 4 x i32> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: <vscale x 4 x i32> [[V:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[V]])
// CHECK-NEXT: ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z17test_write_zt_s32u11__SVInt32_t(
-// CHECK-CXX-SAME: <vscale x 4 x i32> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-SAME: <vscale x 4 x i32> [[V:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[V]])
// CHECK-CXX-NEXT: ret void
//
-void test_write_zt_s32(svint32_t v) __arm_streaming __arm_inout("zt0") {
+void test_write_zt_s32(svint32_t v) __arm_streaming __arm_out("zt0") {
SVE_ACLE_FUNC(svwrite_zt, _s32)(0, v);
}
// CHECK-LABEL: define dso_local void @test_write_zt_u64(
-// CHECK-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]])
// CHECK-NEXT: ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z17test_write_zt_u64u12__SVUint64_t(
-// CHECK-CXX-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]])
// CHECK-CXX-NEXT: ret void
//
-void test_write_zt_u64(svuint64_t v) __arm_streaming __arm_inout("zt0") {
+void test_write_zt_u64(svuint64_t v) __arm_streaming __arm_out("zt0") {
SVE_ACLE_FUNC(svwrite_zt, _u64)(0, v);
}
// CHECK-LABEL: define dso_local void @test_write_zt_s64(
-// CHECK-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]])
// CHECK-NEXT: ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z17test_write_zt_s64u11__SVInt64_t(
-// CHECK-CXX-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]])
// CHECK-CXX-NEXT: ret void
//
-void test_write_zt_s64(svint64_t v) __arm_streaming __arm_inout("zt0") {
+void test_write_zt_s64(svint64_t v) __arm_streaming __arm_out("zt0") {
SVE_ACLE_FUNC(svwrite_zt, _s64)(0, v);
}
// CHECK-LABEL: define dso_local void @test_write_zt_f16(
-// CHECK-SAME: <vscale x 8 x half> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: <vscale x 8 x half> [[V:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv8f16(i32 0, <vscale x 8 x half> [[V]])
// CHECK-NEXT: ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z17test_write_zt_f16u13__SVFloat16_t(
-// CHECK-CXX-SAME: <vscale x 8 x half> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-SAME: <vscale x 8 x half> [[V:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv8f16(i32 0, <vscale x 8 x half> [[V]])
// CHECK-CXX-NEXT: ret void
//
-void test_write_zt_f16(svfloat16_t v) __arm_streaming __arm_inout("zt0") {
+void test_write_zt_f16(svfloat16_t v) __arm_streaming __arm_out("zt0") {
SVE_ACLE_FUNC(svwrite_zt, _f16)(0, v);
}
// CHECK-LABEL: define dso_local void @test_write_zt_bf16(
-// CHECK-SAME: <vscale x 8 x bfloat> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: <vscale x 8 x bfloat> [[V:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv8bf16(i32 0, <vscale x 8 x bfloat> [[V]])
// CHECK-NEXT: ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z18test_write_zt_bf16u14__SVBfloat16_t(
-// CHECK-CXX-SAME: <vscale x 8 x bfloat> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-SAME: <vscale x 8 x bfloat> [[V:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv8bf16(i32 0, <vscale x 8 x bfloat> [[V]])
// CHECK-CXX-NEXT: ret void
//
-void test_write_zt_bf16(svbfloat16_t v) __arm_streaming __arm_inout("zt0") {
+void test_write_zt_bf16(svbfloat16_t v) __arm_streaming __arm_out("zt0") {
SVE_ACLE_FUNC(svwrite_zt, _bf16)(0, v);
}
// CHECK-LABEL: define dso_local void @test_write_zt_f32(
-// CHECK-SAME: <vscale x 4 x float> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: <vscale x 4 x float> [[V:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv4f32(i32 0, <vscale x 4 x float> [[V]])
// CHECK-NEXT: ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z17test_write_zt_f32u13__SVFloat32_t(
-// CHECK-CXX-SAME: <vscale x 4 x float> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-SAME: <vscale x 4 x float> [[V:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv4f32(i32 0, <vscale x 4 x float> [[V]])
// CHECK-CXX-NEXT: ret void
//
-void test_write_zt_f32(svfloat32_t v) __arm_streaming __arm_inout("zt0") {
+void test_write_zt_f32(svfloat32_t v) __arm_streaming __arm_out("zt0") {
SVE_ACLE_FUNC(svwrite_zt, _f32)(0, v);
}
// CHECK-LABEL: define dso_local void @test_write__zt_f64(
-// CHECK-SAME: <vscale x 2 x double> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: <vscale x 2 x double> [[V:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv2f64(i32 0, <vscale x 2 x double> [[V]])
// CHECK-NEXT: ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z18test_write__zt_f64u13__SVFloat64_t(
-// CHECK-CXX-SAME: <vscale x 2 x double> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-SAME: <vscale x 2 x double> [[V:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv2f64(i32 0, <vscale x 2 x double> [[V]])
// CHECK-CXX-NEXT: ret void
//
-void test_write__zt_f64(svfloat64_t v) __arm_streaming __arm_inout("zt0") {
+void test_write__zt_f64(svfloat64_t v) __arm_streaming __arm_out("zt0") {
SVE_ACLE_FUNC(svwrite_zt, _f64)(0, v);
}
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-read-zt.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-write-zt.ll
similarity index 100%
rename from llvm/test/CodeGen/AArch64/sme2-intrinsics-read-zt.ll
rename to llvm/test/CodeGen/AArch64/sme2-intrinsics-write-zt.ll
>From 763832c73caa6e5e7c7c6eb2091a987e16abf0dd Mon Sep 17 00:00:00 2001
From: Caroline Concatto <caroline.concatto at arm.com>
Date: Fri, 26 Jul 2024 15:30:45 +0000
Subject: [PATCH 3/6] Fix index range for write.lane.zt
---
clang/include/clang/Basic/arm_sme.td | 2 +-
.../acle_sme2_write_lane_zt.c | 12 +++---
.../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 3 +-
llvm/lib/Target/AArch64/SMEInstrFormats.td | 41 +++----------------
4 files changed, 15 insertions(+), 43 deletions(-)
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index fcddc7e03c1ab3..5e835a7cdf6650 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -819,7 +819,7 @@ defm SVREADZ_VG2 : ZAReadzArray<"2">;
defm SVREADZ_VG4 : ZAReadzArray<"4">;
let SMETargetGuard = "sme2,sme-lutv2" in {
- def SVWRITE_LANE_ZT : SInst<"svwrite_lane_zt[_{d}]", "vidi", "cUcsUsiUilUlfhdb", MergeNone, "aarch64_sme_write_lane_zt", [IsStreaming, IsInOutZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
+ def SVWRITE_LANE_ZT : SInst<"svwrite_lane_zt[_{d}]", "vidi", "cUcsUsiUilUlfhdb", MergeNone, "aarch64_sme_write_lane_zt", [IsStreaming, IsInOutZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck1_3>]>;
def SVWRITE_ZT : SInst<"svwrite_zt[_{d}]", "vid", "cUcsUsiUilUlfhdb", MergeNone, "aarch64_sme_write_zt", [IsStreaming, IsOutZT0], [ImmCheck<0, ImmCheck0_0>]>;
}
} // let SVETargetGuard = InvalidMode
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c
index 9b7b32a536b650..21966ac36b3c2c 100644
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c
@@ -114,17 +114,17 @@ void test_write_lane_zt_s32_3(svint32_t v) __arm_streaming __arm_inout("zt0") {
// CHECK-LABEL: define dso_local void @test_write_lane_zt_u64_0(
// CHECK-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]], i32 0)
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]], i32 1)
// CHECK-NEXT: ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z24test_write_lane_zt_u64_0u12__SVUint64_t(
// CHECK-CXX-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]], i32 0)
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]], i32 1)
// CHECK-CXX-NEXT: ret void
//
void test_write_lane_zt_u64_0(svuint64_t v) __arm_streaming __arm_inout("zt0") {
- SVE_ACLE_FUNC(svwrite_lane_zt, _u64)(0, v, 0);
+ SVE_ACLE_FUNC(svwrite_lane_zt, _u64)(0, v, 1);
}
// CHECK-LABEL: define dso_local void @test_write_lane_zt_s64_1(
@@ -178,17 +178,17 @@ void test_write_lane_zt_bf16_3(svbfloat16_t v) __arm_streaming __arm_inout("zt0"
// CHECK-LABEL: define dso_local void @test_write_lane_zt_f32_0(
// CHECK-SAME: <vscale x 4 x float> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv4f32(i32 0, <vscale x 4 x float> [[V]], i32 0)
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv4f32(i32 0, <vscale x 4 x float> [[V]], i32 2)
// CHECK-NEXT: ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z24test_write_lane_zt_f32_0u13__SVFloat32_t(
// CHECK-CXX-SAME: <vscale x 4 x float> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv4f32(i32 0, <vscale x 4 x float> [[V]], i32 0)
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv4f32(i32 0, <vscale x 4 x float> [[V]], i32 2)
// CHECK-CXX-NEXT: ret void
//
void test_write_lane_zt_f32_0(svfloat32_t v) __arm_streaming __arm_inout("zt0") {
- SVE_ACLE_FUNC(svwrite_lane_zt, _f32)(0, v, 0);
+ SVE_ACLE_FUNC(svwrite_lane_zt, _f32)(0, v, 2);
}
// CHECK-LABEL: define dso_local void @test_write_lane_zt_f64_1(
diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
index d37090c6afb1ff..65c06aea099cc8 100644
--- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
+++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
@@ -356,5 +356,6 @@ void test_read_zt() __arm_streaming __arm_inout("zt0") {
svwrite_lane_zt(1, svundef_s8(), 1); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
svwrite_zt(1, svundef_s8()); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
// Check index
- svwrite_lane_zt(0, svundef_s8(), 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+ svwrite_lane_zt(0, svundef_s8(), 0); // expected-error {{argument value 4 is outside the valid range [1, 3]}}
+ svwrite_lane_zt(0, svundef_s8(), 4); // expected-error {{argument value 4 is outside the valid range [1, 3]}}
}
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 782169ae2ef1e2..17f265ae28d8c2 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -3287,41 +3287,12 @@ multiclass sme2_movt_zt_to_zt<string mnemonic, bits<7> opc, SDPatternOperator in
def : InstAlias<mnemonic # "\t$ZTt, $Zt",
(!cast<Instruction>(NAME) ZTR:$ZTt, 0, ZPRAny:$Zt), 1>;
- def : Pat<(intrinsic_lane (imm_to_zt untyped:$zt), nxv16i8:$zn, sme_elm_idx0_3:$imm),
- (!cast<Instruction>(NAME # _PSEUDO) $zt, sme_elm_idx0_3:$imm, $zn)>;
- def : Pat<(intrinsic_lane (imm_to_zt untyped:$zt), nxv8i16:$zn, sme_elm_idx0_3:$imm),
- (!cast<Instruction>(NAME # _PSEUDO) $zt, sme_elm_idx0_3:$imm, $zn)>;
- def : Pat<(intrinsic_lane (imm_to_zt untyped:$zt), nxv4i32:$zn, sme_elm_idx0_3:$imm),
- (!cast<Instruction>(NAME # _PSEUDO) $zt, sme_elm_idx0_3:$imm, $zn)>;
- def : Pat<(intrinsic_lane (imm_to_zt untyped:$zt), nxv2i64:$zn, sme_elm_idx0_3:$imm),
- (!cast<Instruction>(NAME # _PSEUDO) $zt, sme_elm_idx0_3:$imm, $zn)>;
- def : Pat<(intrinsic_lane (imm_to_zt untyped:$zt), nxv8f16:$zn, sme_elm_idx0_3:$imm),
- (!cast<Instruction>(NAME # _PSEUDO) $zt, sme_elm_idx0_3:$imm, $zn)>;
- def : Pat<(intrinsic_lane (imm_to_zt untyped:$zt), nxv4f32:$zn, sme_elm_idx0_3:$imm),
- (!cast<Instruction>(NAME # _PSEUDO) $zt, sme_elm_idx0_3:$imm, $zn)>;
- def : Pat<(intrinsic_lane (imm_to_zt untyped:$zt), nxv2f64:$zn, sme_elm_idx0_3:$imm),
- (!cast<Instruction>(NAME # _PSEUDO) $zt, sme_elm_idx0_3:$imm, $zn)>;
- def : Pat<(intrinsic_lane (imm_to_zt untyped:$zt), nxv8bf16:$zn, sme_elm_idx0_3:$imm),
- (!cast<Instruction>(NAME # _PSEUDO) $zt, sme_elm_idx0_3:$imm, $zn)>;
-
- //Alias intrinsic
- def : Pat<(intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn),
- (!cast<Instruction>(NAME # _PSEUDO) $zt, 0, $zn)>;
- def : Pat<(intrinsic (imm_to_zt untyped:$zt), nxv8i16:$zn),
- (!cast<Instruction>(NAME # _PSEUDO) $zt, 0, $zn)>;
- def : Pat<(intrinsic (imm_to_zt untyped:$zt), nxv4i32:$zn),
- (!cast<Instruction>(NAME # _PSEUDO) $zt, 0, $zn)>;
- def : Pat<(intrinsic (imm_to_zt untyped:$zt), nxv2i64:$zn),
- (!cast<Instruction>(NAME # _PSEUDO) $zt, 0, $zn)>;
- def : Pat<(intrinsic (imm_to_zt untyped:$zt), nxv8f16:$zn),
- (!cast<Instruction>(NAME # _PSEUDO) $zt, 0, $zn)>;
- def : Pat<(intrinsic (imm_to_zt untyped:$zt), nxv4f32:$zn),
- (!cast<Instruction>(NAME # _PSEUDO) $zt, 0, $zn)>;
- def : Pat<(intrinsic (imm_to_zt untyped:$zt), nxv2f64:$zn),
- (!cast<Instruction>(NAME # _PSEUDO) $zt, 0, $zn)>;
- def : Pat<(intrinsic (imm_to_zt untyped:$zt), nxv8bf16:$zn),
- (!cast<Instruction>(NAME # _PSEUDO) $zt, 0, $zn)>;
-
+ foreach vt = [nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16] in {
+ def : Pat<(intrinsic_lane (imm_to_zt untyped:$zt), vt:$zn, sme_elm_idx0_3:$imm),
+ (!cast<Instruction>(NAME # _PSEUDO) $zt, $imm, $zn)>;
+ def : Pat<(intrinsic (imm_to_zt untyped:$zt), vt:$zn),
+ (!cast<Instruction>(NAME # _PSEUDO) $zt, 0, $zn)>;
+ }
}
//===----------------------------------------------------------------------===//
>From eeb73422ed935158875edfadcce9cc22b5c302cd Mon Sep 17 00:00:00 2001
From: Caroline Concatto <caroline.concatto at arm.com>
Date: Mon, 29 Jul 2024 10:19:19 +0000
Subject: [PATCH 4/6] Fix expected diagnostic in acle_sme2_imm.cpp test
---
clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
index 65c06aea099cc8..893d51d05de39f 100644
--- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
+++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
@@ -356,6 +356,6 @@ void test_read_zt() __arm_streaming __arm_inout("zt0") {
svwrite_lane_zt(1, svundef_s8(), 1); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
svwrite_zt(1, svundef_s8()); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
// Check index
- svwrite_lane_zt(0, svundef_s8(), 0); // expected-error {{argument value 4 is outside the valid range [1, 3]}}
+ svwrite_lane_zt(0, svundef_s8(), 0); // expected-error {{argument value 0 is outside the valid range [1, 3]}}
svwrite_lane_zt(0, svundef_s8(), 4); // expected-error {{argument value 4 is outside the valid range [1, 3]}}
}
>From b50192d0ddc0aab8d7e8aed5a6d0f6b62b0d61af Mon Sep 17 00:00:00 2001
From: Caroline Concatto <caroline.concatto at arm.com>
Date: Mon, 23 Sep 2024 09:44:47 +0000
Subject: [PATCH 5/6] Change function name in Sema from test_read_zt to
test_write_zt
---
clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
index 893d51d05de39f..fc460fb2e9a361 100644
--- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
+++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
@@ -351,7 +351,7 @@ void test_svdot_multi_za32_bad_lane(uint32_t slice_base, svuint16_t z_u16,
svsudot_lane_za32_s8_vg1x4(slice_base, z_s8x4, z_u8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
}
-void test_read_zt() __arm_streaming __arm_inout("zt0") {
+void test_write_zt() __arm_streaming __arm_inout("zt0") {
// Check Zt tile 0
svwrite_lane_zt(1, svundef_s8(), 1); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
svwrite_zt(1, svundef_s8()); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
>From b1d7b8d0135c1b47d20697f5108a6e1209204e02 Mon Sep 17 00:00:00 2001
From: CarolineConcatto <caroline.concatto at arm.com>
Date: Mon, 23 Sep 2024 17:07:19 +0100
Subject: [PATCH 6/6] Update acle_sme2_write_lane_zt.c test names
---
.../aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c
index 21966ac36b3c2c..c820aafeec9789 100644
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c
@@ -59,7 +59,7 @@ void test_write_lane_zt_s8_2(svint8_t v) __arm_streaming __arm_inout("zt0") {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[V]], i32 1)
// CHECK-CXX-NEXT: ret void
//
-void test_write_lane_zt_u16_3(svuint16_t v) __arm_streaming __arm_inout("zt0") {
+void test_write_lane_zt_u16_1(svuint16_t v) __arm_streaming __arm_inout("zt0") {
SVE_ACLE_FUNC(svwrite_lane_zt, _u16)(0, v, 1);
}
@@ -123,7 +123,7 @@ void test_write_lane_zt_s32_3(svint32_t v) __arm_streaming __arm_inout("zt0") {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]], i32 1)
// CHECK-CXX-NEXT: ret void
//
-void test_write_lane_zt_u64_0(svuint64_t v) __arm_streaming __arm_inout("zt0") {
+void test_write_lane_zt_u64_1(svuint64_t v) __arm_streaming __arm_inout("zt0") {
SVE_ACLE_FUNC(svwrite_lane_zt, _u64)(0, v, 1);
}
@@ -187,7 +187,7 @@ void test_write_lane_zt_bf16_3(svbfloat16_t v) __arm_streaming __arm_inout("zt0"
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv4f32(i32 0, <vscale x 4 x float> [[V]], i32 2)
// CHECK-CXX-NEXT: ret void
//
-void test_write_lane_zt_f32_0(svfloat32_t v) __arm_streaming __arm_inout("zt0") {
+void test_write_lane_zt_f32_2(svfloat32_t v) __arm_streaming __arm_inout("zt0") {
SVE_ACLE_FUNC(svwrite_lane_zt, _f32)(0, v, 2);
}
More information about the llvm-commits
mailing list