[clang] [llvm] [CLANG][LLVM][AArch64]Add SME2.1 intrinsics for MOVAZ tile to vector,… (PR #88499)

via cfe-commits cfe-commits at lists.llvm.org
Fri Apr 12 03:59:08 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-ir

Author: None (CarolineConcatto)

<details>
<summary>Changes</summary>

… single

According to the specification in
ARM-software/acle#<!-- -->309 this adds the intrinsics

// And similarly for u8.
svint8_t svreadz_hor_za8_s8(uint64_t tile, uint32_t slice) __arm_streaming __arm_inout("za");

// And similarly for u16, bf16 and f16.
svint16_t svreadz_hor_za16_s16(uint64_t tile, uint32_t slice) __arm_streaming __arm_inout("za");

// And similarly for u32 and f32.
svint32_t svreadz_hor_za32_s32(uint64_t tile, uint32_t slice) __arm_streaming __arm_inout("za");

// And similarly for u64 and f64.
svint64_t svreadz_hor_za64_s64(uint64_t tile, uint32_t slice) __arm_streaming __arm_inout("za");

// And similarly for s16, s32, s64, u8, u16, u32, u64, bf16, f16, f32, f64 svint8_t svreadz_hor_za128_s8(uint64_t tile, uint32_t slice) __arm_streaming __arm_inout("za");

---

Patch is 57.24 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/88499.diff


9 Files Affected:

- (modified) clang/include/clang/Basic/arm_sme.td (+18) 
- (added) clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c (+417) 
- (added) clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp (+21) 
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+13-1) 
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+37) 
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+3) 
- (modified) llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td (+2-1) 
- (modified) llvm/lib/Target/AArch64/SMEInstrFormats.td (+65-1) 
- (added) llvm/test/CodeGen/AArch64/sme2p1-intrinsics-movaz.ll (+445) 


``````````diff
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index 1ac6d5170ea283..10aa0d1709a74c 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -674,3 +674,21 @@ let TargetGuard = "sme2" in {
   def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
   def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
 }
+
+multiclass ZAReadz<string n_suffix, string t, string i_prefix, list<ImmCheck> ch> {
+  let TargetGuard = "sme2p1" in {
+    def NAME # _H : SInst<"svreadz_hor_" # n_suffix # "_{d}", "dim", t,
+                          MergeNone, i_prefix # "_horiz",
+                          [IsStreaming, IsInOutZA], ch>;
+
+    def NAME # _V : SInst<"svreadz_ver_" # n_suffix # "_{d}", "dim", t,
+                          MergeNone, i_prefix # "_vert",
+                          [IsStreaming, IsInOutZA], ch>;
+  }
+}
+
+defm SVREADZ_ZA8 : ZAReadz<"za8", "cUc", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>;
+defm SVREADZ_ZA16 : ZAReadz<"za16", "sUshb", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_1>]>;
+defm SVREADZ_ZA32 : ZAReadz<"za32", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>;
+defm SVREADZ_ZA64 : ZAReadz<"za64", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>;
+defm SVREADZ_ZA128 : ZAReadz<"za128", "csilUcUiUsUlbhfd", "aarch64_sme_readz_q", [ImmCheck<0, ImmCheck0_15>]>;
diff --git a/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c
new file mode 100644
index 00000000000000..a0b5a882d53b21
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c
@@ -0,0 +1,417 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sme.h>
+
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svreadz_hor_za8_s8(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readz.horiz.nxv16i8(i32 0, i32 [[SLICE]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z23test_svreadz_hor_za8_s8j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readz.horiz.nxv16i8(i32 0, i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svreadz_hor_za8_s8(uint32_t slice) __arm_streaming __arm_inout("za")
+{
+   return svreadz_hor_za8_s8(0, slice);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svreadz_hor_za8_u8(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readz.horiz.nxv16i8(i32 0, i32 [[SLICE]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z23test_svreadz_hor_za8_u8j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readz.horiz.nxv16i8(i32 0, i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svreadz_hor_za8_u8(uint32_t slice) __arm_streaming __arm_inout("za")
+{
+   return svreadz_hor_za8_u8(0, slice);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svreadz_hor_za16_s16(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readz.horiz.nxv8i16(i32 0, i32 [[SLICE]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z25test_svreadz_hor_za16_s16j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readz.horiz.nxv8i16(i32 0, i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svreadz_hor_za16_s16(uint32_t slice) __arm_streaming __arm_inout("za")
+{
+   return svreadz_hor_za16_s16(0, slice);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svreadz_hor_za16_u16(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readz.horiz.nxv8i16(i32 1, i32 [[SLICE]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z25test_svreadz_hor_za16_u16j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readz.horiz.nxv8i16(i32 1, i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svreadz_hor_za16_u16(uint32_t slice) __arm_streaming __arm_inout("za")
+{
+   return svreadz_hor_za16_u16(1, slice);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svreadz_hor_za16_f16(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readz.horiz.nxv8f16(i32 0, i32 [[SLICE]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z25test_svreadz_hor_za16_f16j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readz.horiz.nxv8f16(i32 0, i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svreadz_hor_za16_f16(uint32_t slice) __arm_streaming __arm_inout("za")
+{
+   return svreadz_hor_za16_f16(0, slice);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svreadz_hor_za16_bf16(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readz.horiz.nxv8bf16(i32 1, i32 [[SLICE]])
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z26test_svreadz_hor_za16_bf16j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readz.horiz.nxv8bf16(i32 1, i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svreadz_hor_za16_bf16(uint32_t slice) __arm_streaming __arm_inout("za")
+{
+   return svreadz_hor_za16_bf16(1, slice);
+}
+
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svreadz_hor_za32_s32(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readz.horiz.nxv4i32(i32 0, i32 [[SLICE]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z25test_svreadz_hor_za32_s32j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readz.horiz.nxv4i32(i32 0, i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svreadz_hor_za32_s32(uint32_t slice) __arm_streaming __arm_inout("za")
+{
+   return svreadz_hor_za32_s32(0, slice);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svreadz_hor_za32_u32(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readz.horiz.nxv4i32(i32 2, i32 [[SLICE]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z25test_svreadz_hor_za32_u32j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readz.horiz.nxv4i32(i32 2, i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svreadz_hor_za32_u32(uint32_t slice) __arm_streaming __arm_inout("za")
+{
+   return svreadz_hor_za32_u32(2, slice);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svreadz_hor_za32_f32(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readz.horiz.nxv4f32(i32 3, i32 [[SLICE]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z25test_svreadz_hor_za32_f32j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readz.horiz.nxv4f32(i32 3, i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svreadz_hor_za32_f32(uint32_t slice) __arm_streaming __arm_inout("za")
+{
+   return svreadz_hor_za32_f32(3, slice);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svreadz_hor_za64_s64(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readz.horiz.nxv2i64(i32 0, i32 [[SLICE]])
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z25test_svreadz_hor_za64_s64j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readz.horiz.nxv2i64(i32 0, i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+svint64_t test_svreadz_hor_za64_s64(uint32_t slice) __arm_streaming __arm_inout("za")
+{
+   return svreadz_hor_za64_s64(0, slice);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svreadz_hor_za64_u64(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readz.horiz.nxv2i64(i32 4, i32 [[SLICE]])
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z25test_svreadz_hor_za64_u64j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readz.horiz.nxv2i64(i32 4, i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+svuint64_t test_svreadz_hor_za64_u64(uint32_t slice) __arm_streaming __arm_inout("za")
+{
+   return svreadz_hor_za64_u64(4, slice);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svreadz_hor_za64_f64(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readz.horiz.nxv2f64(i32 7, i32 [[SLICE]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z25test_svreadz_hor_za64_f64j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readz.horiz.nxv2f64(i32 7, i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svreadz_hor_za64_f64(uint32_t slice) __arm_streaming __arm_inout("za")
+{
+   return svreadz_hor_za64_f64(7, slice);
+}
+
+// ZA128
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svreadz_hor_za128_s8(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readz.q.horiz.nxv16i8(i32 0, i32 [[SLICE]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z25test_svreadz_hor_za128_s8j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readz.q.horiz.nxv16i8(i32 0, i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svreadz_hor_za128_s8(uint32_t slice) __arm_streaming __arm_inout("za")
+{
+   return svreadz_hor_za128_s8(0, slice);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svreadz_hor_za128_u8(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readz.q.horiz.nxv16i8(i32 1, i32 [[SLICE]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z25test_svreadz_hor_za128_u8j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readz.q.horiz.nxv16i8(i32 1, i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svreadz_hor_za128_u8(uint32_t slice) __arm_streaming __arm_inout("za")
+{
+   return svreadz_hor_za128_u8(1, slice);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svreadz_hor_za128_s16(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readz.q.horiz.nxv8i16(i32 2, i32 [[SLICE]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z26test_svreadz_hor_za128_s16j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readz.q.horiz.nxv8i16(i32 2, i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svreadz_hor_za128_s16(uint32_t slice) __arm_streaming __arm_inout("za")
+{
+   return svreadz_hor_za128_s16(2, slice);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svreadz_hor_za128_u16(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readz.q.horiz.nxv8i16(i32 3, i32 [[SLICE]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z26test_svreadz_hor_za128_u16j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readz.q.horiz.nxv8i16(i32 3, i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svreadz_hor_za128_u16(uint32_t slice) __arm_streaming __arm_inout("za")
+{
+   return svreadz_hor_za128_u16(3, slice);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svreadz_hor_za128_f16(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readz.q.horiz.nxv8f16(i32 4, i32 [[SLICE]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z26test_svreadz_hor_za128_f16j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readz.q.horiz.nxv8f16(i32 4, i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svreadz_hor_za128_f16(uint32_t slice) __arm_streaming __arm_inout("za")
+{
+   return svreadz_hor_za128_f16(4, slice);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svreadz_hor_za128_bf16(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readz.q.horiz.nxv8bf16(i32 5, i32 [[SLICE]])
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z27test_svreadz_hor_za128_bf16j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readz.q.horiz.nxv8bf16(i32 5, i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svreadz_hor_za128_bf16(uint32_t slice) __arm_streaming __arm_inout("za")
+{
+   return svreadz_hor_za128_bf16(5, slice);
+}
+
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svreadz_hor_za128_s32(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readz.q.horiz.nxv4i32(i32 6, i32 [[SLICE]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z26test_svreadz_hor_za128_s32j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readz.q.horiz.nxv4i32(i32 6, i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svreadz_hor_za128_s32(uint32_t slice) __arm_streaming __arm_inout("za")
+{
+   return svreadz_hor_za128_s32(6, slice);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svreadz_hor_za128_u32(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readz.q.horiz.nxv4i32(i32 7, i32 [[SLICE]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z26test_svreadz_hor_za128_u32j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readz.q.horiz.nxv4i32(i32 7, i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svreadz_hor_za128_u32(uint32_t slice) __arm_streaming __arm_inout("za")
+{
+   return svreadz_hor_za128_u32(7, slice);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svreadz_hor_za128_f32(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readz.q.horiz.nxv4f32(i32 8, i32 [[SLICE]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z26test_svreadz_hor_za128_f32j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readz.q.horiz.nxv4f32(i32 8, i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svreadz_hor_za...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/88499


More information about the cfe-commits mailing list