[clang] [llvm] [AArch64][SME] Add intrinsics for vector groups ZERO (PR #88114)

Wed Apr 17 05:52:03 PDT 2024

https://github.com/Lukacma updated https://github.com/llvm/llvm-project/pull/88114

>From c44bd42f8011dd09771fda50a76a7321342c2b2f Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Thu, 4 Apr 2024 14:36:54 +0000
Subject: [PATCH 1/4] WIP

---
 clang/include/clang/Basic/arm_sme.td          |  29 ++-
 .../acle_sme2p1_zero.c                        |  91 +++++++++
 .../acle_sme2p1_imm.cpp                       | 190 ++++++++++++++++++
 llvm/include/llvm/IR/IntrinsicsAArch64.td     |   6 +
 4 files changed, 315 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
 create mode 100644 clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp

diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index 1ac6d5170ea283..b00eabe331169f 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -146,6 +146,33 @@ let TargetGuard = "sme" in {
                              [IsOverloadNone, IsStreamingCompatible, IsOutZA]>;
 }
 
+let TargetGuard = "sme2p1" in {
+  def SVZERO_ZA64_VG1x2 : SInst<"svzero_za64_vg1x2", "vi", "", MergeNone, "aarch64_sme_zero_za64_vg1x2",
+                            [IsOverloadNone, IsStreaming, IsInOutZA],
+                            [ImmCheck<0, ImmCheck0_7>]>;
+  def SVZERO_ZA64_VG1x4 : SInst<"svzero_za64_vg1x4", "vi", "", MergeNone, "aarch64_sme_zero_za64_vg1x4",
+                            [IsOverloadNone, IsStreaming, IsInOutZA],
+                            [ImmCheck<0, ImmCheck0_7>]>;
+  def SVZERO_ZA64_VG2x1 : SInst<"svzero_za64_vg2x1", "vi", "", MergeNone, "aarch64_sme_zero_za64_vg2x1",
+                            [IsOverloadNone, IsStreaming, IsInOutZA],
+                            [ImmCheck<0, ImmCheck0_7>]>;
+  def SVZERO_ZA64_VG2x2 : SInst<"svzero_za64_vg2x2", "vi", "", MergeNone, "aarch64_sme_zero_za64_vg2x2",
+                            [IsOverloadNone, IsStreaming, IsInOutZA],
+                            [ImmCheck<0, ImmCheck0_3>]>;
+  def SVZERO_ZA64_VG2x4 : SInst<"svzero_za64_vg2x4", "vi", "", MergeNone, "aarch64_sme_zero_za64_vg2x4",
+                            [IsOverloadNone, IsStreaming, IsInOutZA],
+                            [ImmCheck<0, ImmCheck0_3>]>;
+  def SVZERO_ZA64_VG4x1 : SInst<"svzero_za64_vg4x1", "vi", "", MergeNone, "aarch64_sme_zero_za64_vg4x1",
+                            [IsOverloadNone, IsStreaming, IsInOutZA],
+                            [ImmCheck<0, ImmCheck0_3>]>;
+  def SVZERO_ZA64_VG4x2 : SInst<"svzero_za64_vg4x2", "vi", "", MergeNone, "aarch64_sme_zero_za64_vg4x2",
+                            [IsOverloadNone, IsStreaming, IsInOutZA],
+                            [ImmCheck<0, ImmCheck0_1>]>;
+  def SVZERO_ZA64_VG4x4 : SInst<"svzero_za64_vg4x4", "vi", "", MergeNone, "aarch64_sme_zero_za64_vg4x4",
+                            [IsOverloadNone, IsStreaming, IsInOutZA],
+                            [ImmCheck<0, ImmCheck0_1>]>;
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // SME - Counting elements in a streaming vector
 
@@ -673,4 +700,4 @@ let TargetGuard = "sme2" in {
 let TargetGuard = "sme2" in {
   def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
   def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
-}
+}
\ No newline at end of file
diff --git a/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
new file mode 100644
index 00000000000000..3b661ec425cda5
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
@@ -0,0 +1,91 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+#include <arm_sme.h>
+
+#define SVE_ACLE_FUNC(A1,A2) A1##A2
+
+// CHECK-LABEL: define dso_local void @test_svzero_za64_vg1x2(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg1x2(i32 0)
+// CHECK-NEXT:    ret void
+//
+void test_svzero_za64_vg1x2(void) __arm_streaming __arm_inout("za")
+{
+   SVE_ACLE_FUNC(svzero_za64,_vg1x2)(0);
+}
+
+// CHECK-LABEL: define dso_local void @test_svzero_za64_vg1x4(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg1x4(i32 7)
+// CHECK-NEXT:    ret void
+//
+void test_svzero_za64_vg1x4(void) __arm_streaming __arm_inout("za"){
+   SVE_ACLE_FUNC(svzero_za64,_vg1x4)(7);
+}
+
+// CHECK-LABEL: define dso_local void @test_svzero_za64_vg2x1(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg2x1(i32 7)
+// CHECK-NEXT:    ret void
+//
+void test_svzero_za64_vg2x1(void) __arm_streaming __arm_inout("za"){
+   SVE_ACLE_FUNC(svzero_za64,_vg2x1)(7);
+}
+
+// CHECK-LABEL: define dso_local void @test_svzero_za64_vg2x2(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg2x2(i32 3)
+// CHECK-NEXT:    ret void
+//
+void test_svzero_za64_vg2x2(void) __arm_streaming __arm_inout("za"){
+   SVE_ACLE_FUNC(svzero_za64,_vg2x2)(3);
+}
+
+// CHECK-LABEL: define dso_local void @test_svzero_za64_vg2x4(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg2x4(i32 0)
+// CHECK-NEXT:    ret void
+//
+void test_svzero_za64_vg2x4(void)  __arm_streaming __arm_inout("za"){
+   SVE_ACLE_FUNC(svzero_za64,_vg2x4)(0);
+}
+
+// CHECK-LABEL: define dso_local void @test_svzero_za64_vg4x1(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg4x1(i32 3)
+// CHECK-NEXT:    ret void
+//
+void test_svzero_za64_vg4x1(void) __arm_streaming __arm_inout("za"){
+   SVE_ACLE_FUNC(svzero_za64,_vg4x1)(3);
+}
+
+// CHECK-LABEL: define dso_local void @test_svzero_za64_vg4x2(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg4x2(i32 1)
+// CHECK-NEXT:    ret void
+//
+void test_svzero_za64_vg4x2(void) __arm_streaming __arm_inout("za"){
+   SVE_ACLE_FUNC(svzero_za64,_vg4x2)(1);
+}
+
+// CHECK-LABEL: define dso_local void @test_svzero_za64_vg4x4(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg4x4(i32 0)
+// CHECK-NEXT:    ret void
+//
+void test_svzero_za64_vg4x4(void) __arm_streaming __arm_inout("za"){
+   SVE_ACLE_FUNC(svzero_za64,_vg4x4)(0);
+}
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+// CPP-CHECK: {{.*}}
diff --git a/clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp b/clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp
new file mode 100644
index 00000000000000..783cb05a28ea28
--- /dev/null
+++ b/clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp
@@ -0,0 +1,190 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -fsyntax-only -verify %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sme.h>
+void test_svpext_lane_imm_0_3(svcount_t c) {
+  svpext_lane_c8(c, -1);  // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+  svpext_lane_c16(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+  svpext_lane_c32(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+  svpext_lane_c64(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+
+  svpext_lane_c8(c, 4);  // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+  svpext_lane_c16(c, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+  svpext_lane_c32(c, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+  svpext_lane_c64(c, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+}
+
+void test_svpext_lane_x2_imm_0_1(svcount_t c) {
+  svpext_lane_c8_x2(c, -1);  // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
+  svpext_lane_c16_x2(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
+  svpext_lane_c32_x2(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
+  svpext_lane_c64_x2(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
+
+  svpext_lane_c8_x2(c, 2);  // expected-error {{argument value 2 is outside the valid range [0, 1]}}
+  svpext_lane_c16_x2(c, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
+  svpext_lane_c32_x2(c, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
+  svpext_lane_c64_x2(c, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
+}
+
+svcount_t test_svwhile_pn_signed(int64_t op1, int64_t op2) {
+  svwhilege_c8(op1, op2, 6);  // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilege_c16(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilege_c32(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilege_c64(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilegt_c8(op1, op2, 6);  // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilegt_c16(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilegt_c32(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilegt_c64(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilele_c8(op1, op2, 6);  // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilele_c16(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilele_c32(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilele_c64(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilelt_c8(op1, op2, 6);  // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilelt_c16(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilelt_c32(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilelt_c64(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+
+  svwhilege_c8(op1, op2, 3);  // expected-error {{argument should be a multiple of 2}}
+  svwhilege_c16(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
+  svwhilege_c32(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
+  svwhilege_c64(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
+  svwhilegt_c8(op1, op2, 3);  // expected-error {{argument should be a multiple of 2}}
+  svwhilegt_c16(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
+  svwhilegt_c32(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
+  svwhilegt_c64(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
+  svwhilele_c8(op1, op2, 3);  // expected-error {{argument should be a multiple of 2}}
+  svwhilele_c16(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
+  svwhilele_c32(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
+  svwhilele_c64(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
+  svwhilelt_c8(op1, op2, 3);  // expected-error {{argument should be a multiple of 2}}
+  svwhilelt_c16(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
+  svwhilelt_c32(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
+  svwhilelt_c64(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
+}
+
+svcount_t test_svwhile_pn_unsigned(uint64_t op1, uint64_t op2) {
+  svwhilege_c8(op1, op2, 6);  // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilege_c16(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilege_c32(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilege_c64(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilegt_c8(op1, op2, 6);  // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilegt_c16(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilegt_c32(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilegt_c64(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilele_c8(op1, op2, 6);  // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilele_c16(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilele_c32(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilele_c64(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilelt_c8(op1, op2, 6);  // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilelt_c16(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilelt_c32(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+  svwhilelt_c64(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
+
+  svwhilege_c8(op1, op2, 3);  // expected-error {{argument should be a multiple of 2}}
+  svwhilege_c16(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
+  svwhilege_c32(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
+  svwhilege_c64(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
+  svwhilegt_c8(op1, op2, 3);  // expected-error {{argument should be a multiple of 2}}
+  svwhilegt_c16(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
+  svwhilegt_c32(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
+  svwhilegt_c64(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
+  svwhilele_c8(op1, op2, 3);  // expected-error {{argument should be a multiple of 2}}
+  svwhilele_c16(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
+  svwhilele_c32(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
+  svwhilele_c64(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
+  svwhilelt_c8(op1, op2, 3);  // expected-error {{argument should be a multiple of 2}}
+  svwhilelt_c16(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
+  svwhilelt_c32(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
+  svwhilelt_c64(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
+}
+
+void test_cntp(svcount_t c) {
+  svcntp_c8(c, 1);  // expected-error {{argument value 1 is outside the valid range [2, 4]}}
+  svcntp_c16(c, 1); // expected-error {{argument value 1 is outside the valid range [2, 4]}}
+  svcntp_c32(c, 1); // expected-error {{argument value 1 is outside the valid range [2, 4]}}
+  svcntp_c64(c, 1); // expected-error {{argument value 1 is outside the valid range [2, 4]}}
+
+  svcntp_c8(c, 3);  // expected-error {{argument should be a multiple of 2}}
+  svcntp_c16(c, 3); // expected-error {{argument should be a multiple of 2}}
+  svcntp_c32(c, 3); // expected-error {{argument should be a multiple of 2}}
+  svcntp_c64(c, 3); // expected-error {{argument should be a multiple of 2}}
+}
+
+
+void test_svdot_lane_2way(svint32_t s32, svuint32_t u32, svint16_t s16, svuint16_t u16,
+                          svfloat32_t f32, svfloat16_t f16) {
+  svdot_lane_s32_s16(s32, s16, s16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+  svdot_lane_u32_u16(u32, u16, u16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+  svdot_lane_f32_f16(f32, f16, f16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+}
+
+
+__attribute__((target("+sve2p1+b16b16")))
+void test_svbfml_lane(svbfloat16_t zda, svbfloat16_t zn, svbfloat16_t zm, uint64_t idx){
+  svmla_lane_bf16(zda, zn, zm, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
+  svmla_lane_bf16(zda, zn, zm, 8);  // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+  svmls_lane_bf16(zda, zn, zm, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
+  svmls_lane_bf16(zda, zn, zm, 8);  // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+  svmla_lane_bf16(zda, zn, zm, idx); // expected-error {{argument to 'svmla_lane_bf16' must be a constant integer}}
+  svmls_lane_bf16(zda, zn, zm, idx);  // expected-error {{argument to 'svmls_lane_bf16' must be a constant integer}}
+}
+
+__attribute__((target("+sve2p1+b16b16")))
+void test_svbfmul_lane(svbfloat16_t zn, svbfloat16_t zm, uint64_t idx){
+  svmul_lane_bf16(zn, zm, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
+  svmul_lane_bf16(zn, zm, 8);  // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+  svmul_lane_bf16(zn, zm, idx);  // expected-error {{argument to 'svmul_lane_bf16' must be a constant integer}}
+}
+
+__attribute__((target("+sve2p1")))
+void test_svextq_lane(svint16_t zn_i16, svint16_t zm_i16, svfloat16_t zn_f16, svfloat16_t zm_f16){
+  svextq_s16(zn_i16, zm_i16, -1); // expected-error {{argument value -1 is outside the valid range [0, 15]}}
+  svextq_f16(zn_f16, zm_f16, 16);  // expected-error {{argument value 16 is outside the valid range [0, 15]}}
+}
+
+__attribute__((target("+sve2p1")))
+void test_svpmov_lane(){
+  svuint8_t zn_u8;
+  svuint16_t zn_u16;
+  svuint32_t zn_u32;
+  svuint64_t zn_u64;
+  svbool_t pn;
+
+  svpmov_lane_u8(zn_u8, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 0]}}
+  svpmov_lane_u16(zn_u16, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
+  svpmov_lane_u32(zn_u32, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+  svpmov_lane_u64(zn_u64, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
+
+  svpmov_lane_u8(zn_u8, 1); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
+  svpmov_lane_u16(zn_u16, 3); // expected-error {{argument value 3 is outside the valid range [0, 1]}}
+  svpmov_lane_u32(zn_u32, 5); // expected-error {{argument value 5 is outside the valid range [0, 3]}}
+  svpmov_lane_u64(zn_u64, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+
+
+  zn_u16 = svpmov_lane_u16_m(zn_u16, pn, 0); // expected-error {{argument value 0 is outside the valid range [1, 1]}}
+  zn_u32 = svpmov_lane_u32_m(zn_u32, pn, 0); // expected-error {{argument value 0 is outside the valid range [1, 3]}}
+  zn_u64 = svpmov_lane_u64_m(zn_u64, pn, 0); // expected-error {{argument value 0 is outside the valid range [1, 7]}}
+
+  zn_u16 = svpmov_lane_u16_m(zn_u16, pn, 3); // expected-error {{argument value 3 is outside the valid range [1, 1]}}
+  zn_u32 = svpmov_lane_u32_m(zn_u32, pn, 5); // expected-error {{argument value 5 is outside the valid range [1, 3]}}
+  zn_u64 = svpmov_lane_u64_m(zn_u64, pn, 8); // expected-error {{argument value 8 is outside the valid range [1, 7]}}
+}
+
+__attribute__((target("+sve2p1")))
+void test_svget_svset_b(uint64_t idx, svboolx2_t tuple2, svboolx4_t tuple4, svbool_t res){
+  svset2_b(tuple2, -1, res); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
+  svset2_b(tuple2, 2,  res); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
+  svset4_b(tuple4, -1, res); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+  svset4_b(tuple4, 4,  res); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+
+  svget2_b(tuple2, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
+  svget2_b(tuple2,  2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
+  svget4_b(tuple4, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+  svget4_b(tuple4,  4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+
+  svset2_b(tuple2, idx, res); // expected-error {{argument to 'svset2_b' must be a constant integer}}
+  svset4_b(tuple4, idx, res); // expected-error {{argument to 'svset4_b' must be a constant integer}}
+  svget2_b(tuple2, idx); // expected-error {{argument to 'svget2_b' must be a constant integer}}
+  svget4_b(tuple4, idx); // expected-error {{argument to 'svget4_b' must be a constant integer}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index bcaa37de74b630..32f60cf4dedb46 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -3354,6 +3354,12 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sve_bfmlslt : SME2_BFMLS_Intrinsic;
   def int_aarch64_sve_bfmlslt_lane : SME2_BFMLS_Lane_Intrinsic;
 
+  // Multi-vector zeroing
+
+  foreach vg = ["vg1x2", "vg1x4", "vg2x1", "vg2x2", "vg2x4", "vg4x1", "vg4x2", "vg4x4"] in {
+    def int_aarch64_sme_zero_za64_ # vg : DefaultAttrsIntrinsic<[], [llvm_i32_ty],  [IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
+  }
+  
   // Multi-vector signed saturating doubling multiply high
 
   def int_aarch64_sve_sqdmulh_single_vgx2 : SME2_VG2_Multi_Single_Intrinsic;

>From 1ed48b32979ac4ca07fa49352e73274a039eded5 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Tue, 9 Apr 2024 12:11:33 +0000
Subject: [PATCH 2/4] [AArch64][SME] Add intrinsics for vector groups ZERO

---
 clang/include/clang/Basic/arm_sme.td          |  40 ++--
 .../acle_sme2p1_zero.c                        | 116 +++++++----
 .../acle_sme2p1_imm.cpp                       | 190 ------------------
 llvm/include/llvm/IR/IntrinsicsAArch64.td     |   2 +-
 llvm/lib/Target/AArch64/SMEInstrFormats.td    |  46 ++++-
 .../CodeGen/AArch64/sme2p1-intrinsics-zero.ll |  94 +++++++++
 6 files changed, 230 insertions(+), 258 deletions(-)
 delete mode 100644 clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp
 create mode 100644 llvm/test/CodeGen/AArch64/sme2p1-intrinsics-zero.ll

diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index b00eabe331169f..9bcfbf8c4f5c5e 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -147,30 +147,22 @@ let TargetGuard = "sme" in {
 }
 
 let TargetGuard = "sme2p1" in {
-  def SVZERO_ZA64_VG1x2 : SInst<"svzero_za64_vg1x2", "vi", "", MergeNone, "aarch64_sme_zero_za64_vg1x2",
-                            [IsOverloadNone, IsStreaming, IsInOutZA],
-                            [ImmCheck<0, ImmCheck0_7>]>;
-  def SVZERO_ZA64_VG1x4 : SInst<"svzero_za64_vg1x4", "vi", "", MergeNone, "aarch64_sme_zero_za64_vg1x4",
-                            [IsOverloadNone, IsStreaming, IsInOutZA],
-                            [ImmCheck<0, ImmCheck0_7>]>;
-  def SVZERO_ZA64_VG2x1 : SInst<"svzero_za64_vg2x1", "vi", "", MergeNone, "aarch64_sme_zero_za64_vg2x1",
-                            [IsOverloadNone, IsStreaming, IsInOutZA],
-                            [ImmCheck<0, ImmCheck0_7>]>;
-  def SVZERO_ZA64_VG2x2 : SInst<"svzero_za64_vg2x2", "vi", "", MergeNone, "aarch64_sme_zero_za64_vg2x2",
-                            [IsOverloadNone, IsStreaming, IsInOutZA],
-                            [ImmCheck<0, ImmCheck0_3>]>;
-  def SVZERO_ZA64_VG2x4 : SInst<"svzero_za64_vg2x4", "vi", "", MergeNone, "aarch64_sme_zero_za64_vg2x4",
-                            [IsOverloadNone, IsStreaming, IsInOutZA],
-                            [ImmCheck<0, ImmCheck0_3>]>;
-  def SVZERO_ZA64_VG4x1 : SInst<"svzero_za64_vg4x1", "vi", "", MergeNone, "aarch64_sme_zero_za64_vg4x1",
-                            [IsOverloadNone, IsStreaming, IsInOutZA],
-                            [ImmCheck<0, ImmCheck0_3>]>;
-  def SVZERO_ZA64_VG4x2 : SInst<"svzero_za64_vg4x2", "vi", "", MergeNone, "aarch64_sme_zero_za64_vg4x2",
-                            [IsOverloadNone, IsStreaming, IsInOutZA],
-                            [ImmCheck<0, ImmCheck0_1>]>;
-  def SVZERO_ZA64_VG4x4 : SInst<"svzero_za64_vg4x4", "vi", "", MergeNone, "aarch64_sme_zero_za64_vg4x4",
-                            [IsOverloadNone, IsStreaming, IsInOutZA],
-                            [ImmCheck<0, ImmCheck0_1>]>;
+  def SVZERO_ZA64_VG1x2 : SInst<"svzero_za64_vg1x2", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg1x2",
+                            [IsOverloadNone, IsStreaming, IsInOutZA]>;
+  def SVZERO_ZA64_VG1x4 : SInst<"svzero_za64_vg1x4", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg1x4",
+                            [IsOverloadNone, IsStreaming, IsInOutZA]>;
+  def SVZERO_ZA64_VG2x1 : SInst<"svzero_za64_vg2x1", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg2x1",
+                            [IsOverloadNone, IsStreaming, IsInOutZA]>;
+  def SVZERO_ZA64_VG2x2 : SInst<"svzero_za64_vg2x2", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg2x2",
+                            [IsOverloadNone, IsStreaming, IsInOutZA]>;
+  def SVZERO_ZA64_VG2x4 : SInst<"svzero_za64_vg2x4", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg2x4",
+                            [IsOverloadNone, IsStreaming, IsInOutZA]>;
+  def SVZERO_ZA64_VG4x1 : SInst<"svzero_za64_vg4x1", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg4x1",
+                            [IsOverloadNone, IsStreaming, IsInOutZA]>;
+  def SVZERO_ZA64_VG4x2 : SInst<"svzero_za64_vg4x2", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg4x2",
+                            [IsOverloadNone, IsStreaming, IsInOutZA]>;
+  def SVZERO_ZA64_VG4x4 : SInst<"svzero_za64_vg4x4", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg4x4",
+                            [IsOverloadNone, IsStreaming, IsInOutZA]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
index 3b661ec425cda5..bdd75798554148 100644
--- a/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
+++ b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
@@ -3,89 +3,137 @@
 
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sme.h>
 
 #define SVE_ACLE_FUNC(A1,A2) A1##A2
 
 // CHECK-LABEL: define dso_local void @test_svzero_za64_vg1x2(
-// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg1x2(i32 0)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg1x2(i32 [[SLICE]])
 // CHECK-NEXT:    ret void
 //
-void test_svzero_za64_vg1x2(void) __arm_streaming __arm_inout("za")
+// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg1x2j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg1x2(i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svzero_za64_vg1x2(uint32_t slice) __arm_streaming __arm_inout("za")
 {
-   SVE_ACLE_FUNC(svzero_za64,_vg1x2)(0);
+   SVE_ACLE_FUNC(svzero_za64,_vg1x2)(slice);
 }
 
 // CHECK-LABEL: define dso_local void @test_svzero_za64_vg1x4(
-// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg1x4(i32 7)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg1x4(i32 [[SLICE]])
 // CHECK-NEXT:    ret void
 //
-void test_svzero_za64_vg1x4(void) __arm_streaming __arm_inout("za"){
-   SVE_ACLE_FUNC(svzero_za64,_vg1x4)(7);
+// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg1x4j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg1x4(i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svzero_za64_vg1x4(uint32_t slice) __arm_streaming __arm_inout("za"){
+   SVE_ACLE_FUNC(svzero_za64,_vg1x4)(slice);
 }
 
 // CHECK-LABEL: define dso_local void @test_svzero_za64_vg2x1(
-// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg2x1(i32 7)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg2x1(i32 [[SLICE]])
 // CHECK-NEXT:    ret void
 //
-void test_svzero_za64_vg2x1(void) __arm_streaming __arm_inout("za"){
-   SVE_ACLE_FUNC(svzero_za64,_vg2x1)(7);
+// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg2x1j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg2x1(i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svzero_za64_vg2x1(uint32_t slice) __arm_streaming __arm_inout("za"){
+   SVE_ACLE_FUNC(svzero_za64,_vg2x1)(slice);
 }
 
 // CHECK-LABEL: define dso_local void @test_svzero_za64_vg2x2(
-// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg2x2(i32 3)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg2x2(i32 [[SLICE]])
 // CHECK-NEXT:    ret void
 //
-void test_svzero_za64_vg2x2(void) __arm_streaming __arm_inout("za"){
-   SVE_ACLE_FUNC(svzero_za64,_vg2x2)(3);
+// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg2x2j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg2x2(i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svzero_za64_vg2x2(uint32_t slice) __arm_streaming __arm_inout("za"){
+   SVE_ACLE_FUNC(svzero_za64,_vg2x2)(slice);
 }
 
 // CHECK-LABEL: define dso_local void @test_svzero_za64_vg2x4(
-// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg2x4(i32 0)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg2x4(i32 [[SLICE]])
 // CHECK-NEXT:    ret void
 //
-void test_svzero_za64_vg2x4(void)  __arm_streaming __arm_inout("za"){
-   SVE_ACLE_FUNC(svzero_za64,_vg2x4)(0);
+// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg2x4j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg2x4(i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svzero_za64_vg2x4(uint32_t slice)  __arm_streaming __arm_inout("za"){
+   SVE_ACLE_FUNC(svzero_za64,_vg2x4)(slice);
 }
 
 // CHECK-LABEL: define dso_local void @test_svzero_za64_vg4x1(
-// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg4x1(i32 3)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg4x1(i32 [[SLICE]])
 // CHECK-NEXT:    ret void
 //
-void test_svzero_za64_vg4x1(void) __arm_streaming __arm_inout("za"){
-   SVE_ACLE_FUNC(svzero_za64,_vg4x1)(3);
+// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg4x1j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg4x1(i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svzero_za64_vg4x1(uint32_t slice) __arm_streaming __arm_inout("za"){
+   SVE_ACLE_FUNC(svzero_za64,_vg4x1)(slice);
 }
 
 // CHECK-LABEL: define dso_local void @test_svzero_za64_vg4x2(
-// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg4x2(i32 1)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg4x2(i32 [[SLICE]])
 // CHECK-NEXT:    ret void
 //
-void test_svzero_za64_vg4x2(void) __arm_streaming __arm_inout("za"){
-   SVE_ACLE_FUNC(svzero_za64,_vg4x2)(1);
+// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg4x2j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg4x2(i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svzero_za64_vg4x2(uint32_t slice) __arm_streaming __arm_inout("za"){
+   SVE_ACLE_FUNC(svzero_za64,_vg4x2)(slice);
 }
 
 // CHECK-LABEL: define dso_local void @test_svzero_za64_vg4x4(
-// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg4x4(i32 0)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg4x4(i32 [[SLICE]])
 // CHECK-NEXT:    ret void
 //
-void test_svzero_za64_vg4x4(void) __arm_streaming __arm_inout("za"){
-   SVE_ACLE_FUNC(svzero_za64,_vg4x4)(0);
+// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg4x4j(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.za64.vg4x4(i32 [[SLICE]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svzero_za64_vg4x4(uint32_t slice) __arm_streaming __arm_inout("za"){
+   SVE_ACLE_FUNC(svzero_za64,_vg4x4)(slice);
 }
-//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-// CPP-CHECK: {{.*}}
diff --git a/clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp b/clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp
deleted file mode 100644
index 783cb05a28ea28..00000000000000
--- a/clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp
+++ /dev/null
@@ -1,190 +0,0 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -fsyntax-only -verify %s
-
-// REQUIRES: aarch64-registered-target
-
-#include <arm_sme.h>
-void test_svpext_lane_imm_0_3(svcount_t c) {
-  svpext_lane_c8(c, -1);  // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
-  svpext_lane_c16(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
-  svpext_lane_c32(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
-  svpext_lane_c64(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
-
-  svpext_lane_c8(c, 4);  // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  svpext_lane_c16(c, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  svpext_lane_c32(c, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  svpext_lane_c64(c, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-}
-
-void test_svpext_lane_x2_imm_0_1(svcount_t c) {
-  svpext_lane_c8_x2(c, -1);  // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
-  svpext_lane_c16_x2(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
-  svpext_lane_c32_x2(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
-  svpext_lane_c64_x2(c, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
-
-  svpext_lane_c8_x2(c, 2);  // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  svpext_lane_c16_x2(c, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  svpext_lane_c32_x2(c, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  svpext_lane_c64_x2(c, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-}
-
-svcount_t test_svwhile_pn_signed(int64_t op1, int64_t op2) {
-  svwhilege_c8(op1, op2, 6);  // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilege_c16(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilege_c32(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilege_c64(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilegt_c8(op1, op2, 6);  // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilegt_c16(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilegt_c32(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilegt_c64(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilele_c8(op1, op2, 6);  // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilele_c16(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilele_c32(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilele_c64(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilelt_c8(op1, op2, 6);  // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilelt_c16(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilelt_c32(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilelt_c64(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-
-  svwhilege_c8(op1, op2, 3);  // expected-error {{argument should be a multiple of 2}}
-  svwhilege_c16(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
-  svwhilege_c32(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
-  svwhilege_c64(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
-  svwhilegt_c8(op1, op2, 3);  // expected-error {{argument should be a multiple of 2}}
-  svwhilegt_c16(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
-  svwhilegt_c32(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
-  svwhilegt_c64(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
-  svwhilele_c8(op1, op2, 3);  // expected-error {{argument should be a multiple of 2}}
-  svwhilele_c16(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
-  svwhilele_c32(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
-  svwhilele_c64(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
-  svwhilelt_c8(op1, op2, 3);  // expected-error {{argument should be a multiple of 2}}
-  svwhilelt_c16(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
-  svwhilelt_c32(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
-  svwhilelt_c64(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
-}
-
-svcount_t test_svwhile_pn_unsigned(uint64_t op1, uint64_t op2) {
-  svwhilege_c8(op1, op2, 6);  // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilege_c16(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilege_c32(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilege_c64(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilegt_c8(op1, op2, 6);  // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilegt_c16(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilegt_c32(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilegt_c64(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilele_c8(op1, op2, 6);  // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilele_c16(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilele_c32(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilele_c64(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilelt_c8(op1, op2, 6);  // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilelt_c16(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilelt_c32(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-  svwhilelt_c64(op1, op2, 6); // expected-error {{argument value 6 is outside the valid range [2, 4]}}
-
-  svwhilege_c8(op1, op2, 3);  // expected-error {{argument should be a multiple of 2}}
-  svwhilege_c16(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
-  svwhilege_c32(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
-  svwhilege_c64(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
-  svwhilegt_c8(op1, op2, 3);  // expected-error {{argument should be a multiple of 2}}
-  svwhilegt_c16(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
-  svwhilegt_c32(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
-  svwhilegt_c64(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
-  svwhilele_c8(op1, op2, 3);  // expected-error {{argument should be a multiple of 2}}
-  svwhilele_c16(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
-  svwhilele_c32(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
-  svwhilele_c64(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
-  svwhilelt_c8(op1, op2, 3);  // expected-error {{argument should be a multiple of 2}}
-  svwhilelt_c16(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
-  svwhilelt_c32(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
-  svwhilelt_c64(op1, op2, 3); // expected-error {{argument should be a multiple of 2}}
-}
-
-void test_cntp(svcount_t c) {
-  svcntp_c8(c, 1);  // expected-error {{argument value 1 is outside the valid range [2, 4]}}
-  svcntp_c16(c, 1); // expected-error {{argument value 1 is outside the valid range [2, 4]}}
-  svcntp_c32(c, 1); // expected-error {{argument value 1 is outside the valid range [2, 4]}}
-  svcntp_c64(c, 1); // expected-error {{argument value 1 is outside the valid range [2, 4]}}
-
-  svcntp_c8(c, 3);  // expected-error {{argument should be a multiple of 2}}
-  svcntp_c16(c, 3); // expected-error {{argument should be a multiple of 2}}
-  svcntp_c32(c, 3); // expected-error {{argument should be a multiple of 2}}
-  svcntp_c64(c, 3); // expected-error {{argument should be a multiple of 2}}
-}
-
-
-void test_svdot_lane_2way(svint32_t s32, svuint32_t u32, svint16_t s16, svuint16_t u16,
-                          svfloat32_t f32, svfloat16_t f16) {
-  svdot_lane_s32_s16(s32, s16, s16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  svdot_lane_u32_u16(u32, u16, u16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  svdot_lane_f32_f16(f32, f16, f16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-}
-
-
-__attribute__((target("+sve2p1+b16b16")))
-void test_svbfml_lane(svbfloat16_t zda, svbfloat16_t zn, svbfloat16_t zm, uint64_t idx){
-  svmla_lane_bf16(zda, zn, zm, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
-  svmla_lane_bf16(zda, zn, zm, 8);  // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-  svmls_lane_bf16(zda, zn, zm, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
-  svmls_lane_bf16(zda, zn, zm, 8);  // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-  svmla_lane_bf16(zda, zn, zm, idx); // expected-error {{argument to 'svmla_lane_bf16' must be a constant integer}}
-  svmls_lane_bf16(zda, zn, zm, idx);  // expected-error {{argument to 'svmls_lane_bf16' must be a constant integer}}
-}
-
-__attribute__((target("+sve2p1+b16b16")))
-void test_svbfmul_lane(svbfloat16_t zn, svbfloat16_t zm, uint64_t idx){
-  svmul_lane_bf16(zn, zm, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
-  svmul_lane_bf16(zn, zm, 8);  // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-  svmul_lane_bf16(zn, zm, idx);  // expected-error {{argument to 'svmul_lane_bf16' must be a constant integer}}
-}
-
-__attribute__((target("+sve2p1")))
-void test_svextq_lane(svint16_t zn_i16, svint16_t zm_i16, svfloat16_t zn_f16, svfloat16_t zm_f16){
-  svextq_s16(zn_i16, zm_i16, -1); // expected-error {{argument value -1 is outside the valid range [0, 15]}}
-  svextq_f16(zn_f16, zm_f16, 16);  // expected-error {{argument value 16 is outside the valid range [0, 15]}}
-}
-
-__attribute__((target("+sve2p1")))
-void test_svpmov_lane(){
-  svuint8_t zn_u8;
-  svuint16_t zn_u16;
-  svuint32_t zn_u32;
-  svuint64_t zn_u64;
-  svbool_t pn;
-
-  svpmov_lane_u8(zn_u8, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 0]}}
-  svpmov_lane_u16(zn_u16, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
-  svpmov_lane_u32(zn_u32, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
-  svpmov_lane_u64(zn_u64, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
-
-  svpmov_lane_u8(zn_u8, 1); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
-  svpmov_lane_u16(zn_u16, 3); // expected-error {{argument value 3 is outside the valid range [0, 1]}}
-  svpmov_lane_u32(zn_u32, 5); // expected-error {{argument value 5 is outside the valid range [0, 3]}}
-  svpmov_lane_u64(zn_u64, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-
-
-  zn_u16 = svpmov_lane_u16_m(zn_u16, pn, 0); // expected-error {{argument value 0 is outside the valid range [1, 1]}}
-  zn_u32 = svpmov_lane_u32_m(zn_u32, pn, 0); // expected-error {{argument value 0 is outside the valid range [1, 3]}}
-  zn_u64 = svpmov_lane_u64_m(zn_u64, pn, 0); // expected-error {{argument value 0 is outside the valid range [1, 7]}}
-
-  zn_u16 = svpmov_lane_u16_m(zn_u16, pn, 3); // expected-error {{argument value 3 is outside the valid range [1, 1]}}
-  zn_u32 = svpmov_lane_u32_m(zn_u32, pn, 5); // expected-error {{argument value 5 is outside the valid range [1, 3]}}
-  zn_u64 = svpmov_lane_u64_m(zn_u64, pn, 8); // expected-error {{argument value 8 is outside the valid range [1, 7]}}
-}
-
-__attribute__((target("+sve2p1")))
-void test_svget_svset_b(uint64_t idx, svboolx2_t tuple2, svboolx4_t tuple4, svbool_t res){
-  svset2_b(tuple2, -1, res); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
-  svset2_b(tuple2, 2,  res); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  svset4_b(tuple4, -1, res); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
-  svset4_b(tuple4, 4,  res); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-
-  svget2_b(tuple2, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
-  svget2_b(tuple2,  2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  svget4_b(tuple4, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
-  svget4_b(tuple4,  4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-
-  svset2_b(tuple2, idx, res); // expected-error {{argument to 'svset2_b' must be a constant integer}}
-  svset4_b(tuple4, idx, res); // expected-error {{argument to 'svset4_b' must be a constant integer}}
-  svget2_b(tuple2, idx); // expected-error {{argument to 'svget2_b' must be a constant integer}}
-  svget4_b(tuple4, idx); // expected-error {{argument to 'svget4_b' must be a constant integer}}
-}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 32f60cf4dedb46..b05fd202cf97fb 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -3357,7 +3357,7 @@ let TargetPrefix = "aarch64" in {
   // Multi-vector zeroing
 
   foreach vg = ["vg1x2", "vg1x4", "vg2x1", "vg2x2", "vg2x4", "vg4x1", "vg4x2", "vg4x4"] in {
-    def int_aarch64_sme_zero_za64_ # vg : DefaultAttrsIntrinsic<[], [llvm_i32_ty],  [IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
+    def int_aarch64_sme_zero_za64_ # vg : DefaultAttrsIntrinsic<[], [llvm_i32_ty],  [IntrNoMem, IntrHasSideEffects]>;
   }
   
   // Multi-vector signed saturating doubling multiply high
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 44d9a8ac7cb677..5bd74951cb7d0f 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -104,6 +104,13 @@ class sme2_move_to_tile_pseudo<string name, Operand tile_imm, Operand imm_ty, Re
   let usesCustomInserter = 1;
 }
 
+class sem2p1_zero_matrix_pseudo<string name, Operand index_ty, SMEMatrixTypeEnum za_flag>
+    : SMEPseudo2Instr<name, 0>,
+      Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rs, index_ty:$imm), []> {
+  let SMEMatrixType = za_flag;
+  let usesCustomInserter = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // SME pattern match helpers.
 //===----------------------------------------------------------------------===//
@@ -189,6 +196,9 @@ class SME2_Tile_VG4_Multi_Pat<string name, SDPatternOperator intrinsic, Operand
     : Pat<(intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
           (!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>;
 
+class SME2_Zero_Matrix_Pat<string name, SDPatternOperator intrinsic, Operand offset_ty, ComplexPattern tileslice>
+    : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset))),
+    (!cast<Instruction>(name) $base, $offset)>; 
 //===----------------------------------------------------------------------===//
 // SME pattern match helpers.
 //===----------------------------------------------------------------------===//
@@ -4774,39 +4784,57 @@ class sme2p1_zero_matrix<bits<6> opc, Operand index_ty, string mnemonic,
 }
 
 multiclass sme2p1_zero_matrix<string mnemonic> {
-  def _VG2_Z : sme2p1_zero_matrix<{0b000,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx2"> {
+  def _VG2_Z : sme2p1_zero_matrix<{0b000,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx2">, SMEPseudo2Instr<NAME # _VG2_Z , 1> {
     bits<3> imm;
     let Inst{2-0} = imm;
   }
-  def _2Z : sme2p1_zero_matrix<{0b001,?,?,?}, uimm3s2range, mnemonic> {
+  def _2Z : sme2p1_zero_matrix<{0b001,?,?,?}, uimm3s2range, mnemonic>, SMEPseudo2Instr<NAME # _2Z, 1> {
     bits<3> imm;
     let Inst{2-0} = imm;
   }
-  def _VG2_2Z : sme2p1_zero_matrix<{0b0100,?,?}, uimm2s2range, mnemonic, "vgx2"> {
+  def _VG2_2Z : sme2p1_zero_matrix<{0b0100,?,?}, uimm2s2range, mnemonic, "vgx2">, SMEPseudo2Instr<NAME # _VG2_2Z, 1> {
     bits<2> imm;
     let Inst{1-0} = imm;
   }
-  def _VG4_2Z : sme2p1_zero_matrix<{0b0110,?,?}, uimm2s2range, mnemonic, "vgx4"> {
+  def _VG4_2Z : sme2p1_zero_matrix<{0b0110,?,?}, uimm2s2range, mnemonic, "vgx4">, SMEPseudo2Instr<NAME # _VG4_2Z, 1> {
     bits<2> imm;
     let Inst{1-0} = imm;
   }
-  def _VG4_Z : sme2p1_zero_matrix<{0b100,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx4"> {
+  def _VG4_Z : sme2p1_zero_matrix<{0b100,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx4">, SMEPseudo2Instr<NAME # _VG4_Z, 1> {
     bits<3> imm;
     let Inst{2-0} = imm;
   }
-  def _4Z : sme2p1_zero_matrix<{0b1010,?,?}, uimm2s4range, mnemonic> {
+  def _4Z : sme2p1_zero_matrix<{0b1010,?,?}, uimm2s4range, mnemonic>, SMEPseudo2Instr<NAME # _4Z, 1> {
     bits<2> imm;
     let Inst{1-0} = imm;
   }
-  def _VG2_4Z :sme2p1_zero_matrix<{0b11000,?}, uimm1s4range, mnemonic, "vgx2"> {
+  def _VG2_4Z : sme2p1_zero_matrix<{0b11000,?}, uimm1s4range, mnemonic, "vgx2">, SMEPseudo2Instr<NAME # _VG2_4Z, 1> {
     bits<1> imm;
     let Inst{0}   = imm;
   }
-  def _VG4_4Z :sme2p1_zero_matrix<{0b11100,?}, uimm1s4range, mnemonic, "vgx4"> {
+  def _VG4_4Z : sme2p1_zero_matrix<{0b11100,?}, uimm1s4range, mnemonic, "vgx4">, SMEPseudo2Instr<NAME # _VG4_4Z, 1> {
     bits<1> imm;
     let Inst{0}   = imm;
   }
-}
+
+  def NAME # _VG2_Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG2_Z, sme_elm_idx0_7, SMEMatrixArray>;
+  def NAME # _VG4_Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG4_Z, sme_elm_idx0_7, SMEMatrixArray>;
+  def NAME # _2Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _2Z, uimm2s2range, SMEMatrixArray>;
+  def NAME # _VG2_2Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG2_2Z, uimm1s2range, SMEMatrixArray>;
+  def NAME # _VG4_2Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG4_2Z, uimm1s2range, SMEMatrixArray>;
+  def NAME # _4Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _4Z, uimm1s4range, SMEMatrixArray>;
+  def NAME # _VG2_4Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG2_4Z, uimm0s4range, SMEMatrixArray>;
+  def NAME # _VG4_4Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG4_4Z, uimm0s4range, SMEMatrixArray>;
+
+  def : SME2_Zero_Matrix_Pat<NAME # _VG2_Z_PSEUDO, int_aarch64_sme_zero_za64_vg1x2, sme_elm_idx0_7, tileslice16>;
+  def : SME2_Zero_Matrix_Pat<NAME # _VG4_Z_PSEUDO, int_aarch64_sme_zero_za64_vg1x4, sme_elm_idx0_7, tileslice16>;
+  def : SME2_Zero_Matrix_Pat<NAME # _2Z_PSEUDO, int_aarch64_sme_zero_za64_vg2x1, uimm2s2range, tileslicerange2s2>;
+  def : SME2_Zero_Matrix_Pat<NAME # _VG2_2Z_PSEUDO, int_aarch64_sme_zero_za64_vg2x2, uimm1s2range, tileslicerange1s2>;
+  def : SME2_Zero_Matrix_Pat<NAME # _VG4_2Z_PSEUDO, int_aarch64_sme_zero_za64_vg2x4, uimm1s2range, tileslicerange1s2>;
+  def : SME2_Zero_Matrix_Pat<NAME # _4Z_PSEUDO, int_aarch64_sme_zero_za64_vg4x1, uimm1s4range, tileslicerange1s4>;
+  def : SME2_Zero_Matrix_Pat<NAME # _VG2_4Z_PSEUDO, int_aarch64_sme_zero_za64_vg4x2, uimm0s4range, tileslicerange0s4>;
+  def : SME2_Zero_Matrix_Pat<NAME # _VG4_4Z_PSEUDO, int_aarch64_sme_zero_za64_vg4x4, uimm0s4range, tileslicerange0s4>;
+} 
 
 //===----------------------------------------------------------------------===//
 // SME2.1 lookup table expand two non-contiguous registers
diff --git a/llvm/test/CodeGen/AArch64/sme2p1-intrinsics-zero.ll b/llvm/test/CodeGen/AArch64/sme2p1-intrinsics-zero.ll
new file mode 100644
index 00000000000000..81425f33e494e3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme2p1-intrinsics-zero.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+
+target triple = "aarch64-linux"
+
+define  void @test_svzero_za64_vg1x2(i32  %slice)  #0 {
+; CHECK-LABEL: test_svzero_za64_vg1x2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    zero za.d[w8, 0, vgx2]
+; CHECK-NEXT:    ret
+entry:
+  tail call void @llvm.aarch64.sme.zero.za64.vg1x2(i32 %slice)
+  ret void
+}
+
+define  void @test_svzero_za64_vg1x4(i32  %slice)  #0 {
+; CHECK-LABEL: test_svzero_za64_vg1x4:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    zero za.d[w8, 0, vgx4]
+; CHECK-NEXT:    ret
+entry:
+  tail call void @llvm.aarch64.sme.zero.za64.vg1x4(i32 %slice)
+  ret void
+}
+
+define  void @test_svzero_za64_vg2x1(i32  %slice)  #0 {
+; CHECK-LABEL: test_svzero_za64_vg2x1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    zero za.d[w8, 0:1]
+; CHECK-NEXT:    ret
+entry:
+  tail call void @llvm.aarch64.sme.zero.za64.vg2x1(i32 %slice)
+  ret void
+}
+
+define  void @test_svzero_za64_vg2x2(i32  %slice)  #0 {
+; CHECK-LABEL: test_svzero_za64_vg2x2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    zero za.d[w8, 0:1, vgx2]
+; CHECK-NEXT:    ret
+entry:
+  tail call void @llvm.aarch64.sme.zero.za64.vg2x2(i32 %slice)
+  ret void
+}
+
+define  void @test_svzero_za64_vg2x4(i32  %slice)  #0 {
+; CHECK-LABEL: test_svzero_za64_vg2x4:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    zero za.d[w8, 0:1, vgx4]
+; CHECK-NEXT:    ret
+entry:
+  tail call void @llvm.aarch64.sme.zero.za64.vg2x4(i32 %slice)
+  ret void
+}
+
+define  void @test_svzero_za64_vg4x1(i32  %slice)  #0 {
+; CHECK-LABEL: test_svzero_za64_vg4x1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    zero za.d[w8, 0:3]
+; CHECK-NEXT:    ret
+entry:
+  tail call void @llvm.aarch64.sme.zero.za64.vg4x1(i32 %slice)
+  ret void
+}
+
+define  void @test_svzero_za64_vg4x2(i32  %slice)  #0 {
+; CHECK-LABEL: test_svzero_za64_vg4x2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    zero za.d[w8, 0:3, vgx2]
+; CHECK-NEXT:    ret
+entry:
+  tail call void @llvm.aarch64.sme.zero.za64.vg4x2(i32 %slice)
+  ret void
+}
+
+define  void @test_svzero_za64_vg4x4(i32  %slice)  #0 {
+; CHECK-LABEL: test_svzero_za64_vg4x4:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    zero za.d[w8, 0:3, vgx4]
+; CHECK-NEXT:    ret
+entry:
+  tail call void @llvm.aarch64.sme.zero.za64.vg4x4(i32 %slice)
+  ret void
+}
+
+attributes #0 = { nounwind "target-features" = "+sme2p1"}

>From 260f6b602303e680a3133e495612f1660fb92c0e Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Wed, 17 Apr 2024 12:34:28 +0000
Subject: [PATCH 3/4] Added tests for slicec created from imm addition

---
 .../CodeGen/AArch64/sme2p1-intrinsics-zero.ll | 96 +++++++++++++++++++
 1 file changed, 96 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/sme2p1-intrinsics-zero.ll b/llvm/test/CodeGen/AArch64/sme2p1-intrinsics-zero.ll
index 81425f33e494e3..1df849f7feda30 100644
--- a/llvm/test/CodeGen/AArch64/sme2p1-intrinsics-zero.ll
+++ b/llvm/test/CodeGen/AArch64/sme2p1-intrinsics-zero.ll
@@ -14,6 +14,18 @@ entry:
   ret void
 }
 
+define  void @test_svzero_za64_vg1x2_offset(i32  %slice)  #0 {
+; CHECK-LABEL: test_svzero_za64_vg1x2_offset:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    zero za.d[w8, 7, vgx2]
+; CHECK-NEXT:    ret
+entry:
+  %slice.max = add i32 %slice, 7
+  tail call void @llvm.aarch64.sme.zero.za64.vg1x2(i32 %slice.max)
+  ret void
+}
+
 define  void @test_svzero_za64_vg1x4(i32  %slice)  #0 {
 ; CHECK-LABEL: test_svzero_za64_vg1x4:
 ; CHECK:       // %bb.0: // %entry
@@ -25,6 +37,18 @@ entry:
   ret void
 }
 
+define  void @test_svzero_za64_vg1x4_offset(i32  %slice)  #0 {
+; CHECK-LABEL: test_svzero_za64_vg1x4_offset:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    zero za.d[w8, 0, vgx4]
+; CHECK-NEXT:    ret
+entry:
+  %slice.min = add i32 %slice, 0
+  tail call void @llvm.aarch64.sme.zero.za64.vg1x4(i32 %slice.min)
+  ret void
+}
+
 define  void @test_svzero_za64_vg2x1(i32  %slice)  #0 {
 ; CHECK-LABEL: test_svzero_za64_vg2x1:
 ; CHECK:       // %bb.0: // %entry
@@ -36,6 +60,18 @@ entry:
   ret void
 }
 
+define  void @test_svzero_za64_vg2x1_offset(i32  %slice)  #0 {
+; CHECK-LABEL: test_svzero_za64_vg2x1_offset:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    add w8, w0, #7
+; CHECK-NEXT:    zero za.d[w8, 0:1]
+; CHECK-NEXT:    ret
+entry:
+  %slice.max = add i32 %slice, 7
+  tail call void @llvm.aarch64.sme.zero.za64.vg2x1(i32 %slice.max)
+  ret void
+}
+
 define  void @test_svzero_za64_vg2x2(i32  %slice)  #0 {
 ; CHECK-LABEL: test_svzero_za64_vg2x2:
 ; CHECK:       // %bb.0: // %entry
@@ -47,6 +83,18 @@ entry:
   ret void
 }
 
+define  void @test_svzero_za64_vg2x2_offset(i32  %slice)  #0 {
+; CHECK-LABEL: test_svzero_za64_vg2x2_offset:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    add w8, w0, #3
+; CHECK-NEXT:    zero za.d[w8, 0:1, vgx2]
+; CHECK-NEXT:    ret
+entry:
+  %slice.max = add i32 %slice, 3
+  tail call void @llvm.aarch64.sme.zero.za64.vg2x2(i32 %slice.max)
+  ret void
+}
+
 define  void @test_svzero_za64_vg2x4(i32  %slice)  #0 {
 ; CHECK-LABEL: test_svzero_za64_vg2x4:
 ; CHECK:       // %bb.0: // %entry
@@ -58,6 +106,18 @@ entry:
   ret void
 }
 
+define  void @test_svzero_za64_vg2x4_offset(i32  %slice)  #0 {
+; CHECK-LABEL: test_svzero_za64_vg2x4_offset:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    zero za.d[w8, 0:1, vgx4]
+; CHECK-NEXT:    ret
+entry:
+  %slice.min = add i32 %slice, 0
+  tail call void @llvm.aarch64.sme.zero.za64.vg2x4(i32 %slice.min)
+  ret void
+}
+
 define  void @test_svzero_za64_vg4x1(i32  %slice)  #0 {
 ; CHECK-LABEL: test_svzero_za64_vg4x1:
 ; CHECK:       // %bb.0: // %entry
@@ -69,6 +129,18 @@ entry:
   ret void
 }
 
+define  void @test_svzero_za64_vg4x1_offset(i32  %slice)  #0 {
+; CHECK-LABEL: test_svzero_za64_vg4x1_offset:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    add w8, w0, #3
+; CHECK-NEXT:    zero za.d[w8, 0:3]
+; CHECK-NEXT:    ret
+entry:
+  %slice.max = add i32 %slice, 3
+  tail call void @llvm.aarch64.sme.zero.za64.vg4x1(i32 %slice.max)
+  ret void
+}
+
 define  void @test_svzero_za64_vg4x2(i32  %slice)  #0 {
 ; CHECK-LABEL: test_svzero_za64_vg4x2:
 ; CHECK:       // %bb.0: // %entry
@@ -80,6 +152,18 @@ entry:
   ret void
 }
 
+define  void @test_svzero_za64_vg4x2_offset(i32  %slice)  #0 {
+; CHECK-LABEL: test_svzero_za64_vg4x2_offset:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    add w8, w0, #1
+; CHECK-NEXT:    zero za.d[w8, 0:3, vgx2]
+; CHECK-NEXT:    ret
+entry:
+  %slice.max = add i32 %slice, 1
+  tail call void @llvm.aarch64.sme.zero.za64.vg4x2(i32 %slice.max)
+  ret void
+}
+
 define  void @test_svzero_za64_vg4x4(i32  %slice)  #0 {
 ; CHECK-LABEL: test_svzero_za64_vg4x4:
 ; CHECK:       // %bb.0: // %entry
@@ -91,4 +175,16 @@ entry:
   ret void
 }
 
+define  void @test_svzero_za64_vg4x4_offset(i32  %slice)  #0 {
+; CHECK-LABEL: test_svzero_za64_vg4x4_offset:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    zero za.d[w8, 0:3, vgx4]
+; CHECK-NEXT:    ret
+entry:
+  %slice.min = add i32 %slice, 0
+  tail call void @llvm.aarch64.sme.zero.za64.vg4x4(i32 %slice.min)
+  ret void
+}
+
 attributes #0 = { nounwind "target-features" = "+sme2p1"}

>From 437268ff741f2c83803dcdce802859c5aeab1f5d Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Wed, 17 Apr 2024 12:50:24 +0000
Subject: [PATCH 4/4] FIx to ZERO tests to test if offset is working

---
 .../CodeGen/AArch64/sme2p1-intrinsics-zero.ll | 34 +++++++++----------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/sme2p1-intrinsics-zero.ll b/llvm/test/CodeGen/AArch64/sme2p1-intrinsics-zero.ll
index 1df849f7feda30..ba77637580f4cb 100644
--- a/llvm/test/CodeGen/AArch64/sme2p1-intrinsics-zero.ll
+++ b/llvm/test/CodeGen/AArch64/sme2p1-intrinsics-zero.ll
@@ -41,10 +41,10 @@ define  void @test_svzero_za64_vg1x4_offset(i32  %slice)  #0 {
 ; CHECK-LABEL: test_svzero_za64_vg1x4_offset:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    zero za.d[w8, 0, vgx4]
+; CHECK-NEXT:    zero za.d[w8, 1, vgx4]
 ; CHECK-NEXT:    ret
 entry:
-  %slice.min = add i32 %slice, 0
+  %slice.min = add i32 %slice, 1
   tail call void @llvm.aarch64.sme.zero.za64.vg1x4(i32 %slice.min)
   ret void
 }
@@ -63,11 +63,11 @@ entry:
 define  void @test_svzero_za64_vg2x1_offset(i32  %slice)  #0 {
 ; CHECK-LABEL: test_svzero_za64_vg2x1_offset:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    add w8, w0, #7
-; CHECK-NEXT:    zero za.d[w8, 0:1]
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    zero za.d[w8, 6:7]
 ; CHECK-NEXT:    ret
 entry:
-  %slice.max = add i32 %slice, 7
+  %slice.max = add i32 %slice, 6
   tail call void @llvm.aarch64.sme.zero.za64.vg2x1(i32 %slice.max)
   ret void
 }
@@ -86,11 +86,11 @@ entry:
 define  void @test_svzero_za64_vg2x2_offset(i32  %slice)  #0 {
 ; CHECK-LABEL: test_svzero_za64_vg2x2_offset:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    add w8, w0, #3
-; CHECK-NEXT:    zero za.d[w8, 0:1, vgx2]
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    zero za.d[w8, 2:3, vgx2]
 ; CHECK-NEXT:    ret
 entry:
-  %slice.max = add i32 %slice, 3
+  %slice.max = add i32 %slice, 2
   tail call void @llvm.aarch64.sme.zero.za64.vg2x2(i32 %slice.max)
   ret void
 }
@@ -109,11 +109,11 @@ entry:
 define  void @test_svzero_za64_vg2x4_offset(i32  %slice)  #0 {
 ; CHECK-LABEL: test_svzero_za64_vg2x4_offset:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    add w8, w0, #1
 ; CHECK-NEXT:    zero za.d[w8, 0:1, vgx4]
 ; CHECK-NEXT:    ret
 entry:
-  %slice.min = add i32 %slice, 0
+  %slice.min = add i32 %slice, 1
   tail call void @llvm.aarch64.sme.zero.za64.vg2x4(i32 %slice.min)
   ret void
 }
@@ -132,11 +132,11 @@ entry:
 define  void @test_svzero_za64_vg4x1_offset(i32  %slice)  #0 {
 ; CHECK-LABEL: test_svzero_za64_vg4x1_offset:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    add w8, w0, #3
-; CHECK-NEXT:    zero za.d[w8, 0:3]
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    zero za.d[w8, 4:7]
 ; CHECK-NEXT:    ret
 entry:
-  %slice.max = add i32 %slice, 3
+  %slice.max = add i32 %slice, 4
   tail call void @llvm.aarch64.sme.zero.za64.vg4x1(i32 %slice.max)
   ret void
 }
@@ -155,11 +155,11 @@ entry:
 define  void @test_svzero_za64_vg4x2_offset(i32  %slice)  #0 {
 ; CHECK-LABEL: test_svzero_za64_vg4x2_offset:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    add w8, w0, #1
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    zero za.d[w8, 0:3, vgx2]
 ; CHECK-NEXT:    ret
 entry:
-  %slice.max = add i32 %slice, 1
+  %slice.max = add i32 %slice, 0
   tail call void @llvm.aarch64.sme.zero.za64.vg4x2(i32 %slice.max)
   ret void
 }
@@ -178,11 +178,11 @@ entry:
 define  void @test_svzero_za64_vg4x4_offset(i32  %slice)  #0 {
 ; CHECK-LABEL: test_svzero_za64_vg4x4_offset:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    add w8, w0, #1
 ; CHECK-NEXT:    zero za.d[w8, 0:3, vgx4]
 ; CHECK-NEXT:    ret
 entry:
-  %slice.min = add i32 %slice, 0
+  %slice.min = add i32 %slice, 1
   tail call void @llvm.aarch64.sme.zero.za64.vg4x4(i32 %slice.min)
   ret void
 }