[clang] [llvm] Add SME2 builtins for zero { zt0 } (PR #72274)

Matthew Devereau via cfe-commits cfe-commits at lists.llvm.org
Fri Dec 1 05:49:42 PST 2023


https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/72274

>From 86c61659cf99486965dffe201385b28420e93f41 Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Thu, 9 Nov 2023 16:08:57 +0000
Subject: [PATCH 1/4] Add SME2 builtins for zero { zt0 }

Patch by: Kerry McLaughlin kerry.mclaughlin at arm.com
---
 clang/include/clang/Basic/arm_sme.td          |  5 +++
 .../acle_sme2_zero_zt.c                       | 32 +++++++++++++++++++
 llvm/include/llvm/IR/IntrinsicsAArch64.td     |  4 +++
 .../Target/AArch64/AArch64ISelLowering.cpp    | 26 ++++++++-------
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |  4 +--
 .../lib/Target/AArch64/AArch64SMEInstrInfo.td |  2 +-
 llvm/lib/Target/AArch64/SMEInstrFormats.td    | 11 +++++++
 .../AArch64/sme2-intrinsics-zero-zt.ll        | 13 ++++++++
 8 files changed, 83 insertions(+), 14 deletions(-)
 create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c
 create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-zero-zt.ll

diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index 7aae3c832bb1fe2..48afd6431fc8b69 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -321,4 +321,9 @@ let TargetGuard = "sme2" in {
 let TargetGuard = "sme2" in {
   def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>;
   def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>;
+
+//
+// Zero ZT0
+//
+  def SVZERO_ZT : Inst<"svzero_zt", "vi", "", MergeNone, "aarch64_sme_zero_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>;
 }
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c
new file mode 100644
index 000000000000000..4ea26119301cab2
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c
@@ -0,0 +1,32 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sme_draft_spec_subject_to_change.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1) A1
+#else
+#define SVE_ACLE_FUNC(A1) A1
+#endif
+
+// CHECK-LABEL: @test_svzero_zt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.zt(i32 0)
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z14test_svzero_ztv(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.zt(i32 0)
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svzero_zt(void) __arm_streaming_compatible __arm_shared_za __arm_preserves_za  {
+  svzero_zt(0);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 60a8d98f3bc0d26..3c0a07be50607bb 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -3544,6 +3544,10 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sme_ldr_zt : SME_LDR_STR_ZT_Intrinsic;
   def int_aarch64_sme_str_zt : SME_LDR_STR_ZT_Intrinsic;
 
+  //
+  //  Zero ZT0
+  //
+  def int_aarch64_sme_zero_zt : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrWriteMem]>;
 }
 
 // SVE2.1 - ZIPQ1, ZIPQ2, UZPQ1, UZPQ2
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4379c3fde6f3c5d..f0f0fe1e807b4be 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2753,17 +2753,19 @@ AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const {
   return BB;
 }
 
-MachineBasicBlock *AArch64TargetLowering::EmitZTSpillFill(MachineInstr &MI,
-                                                          MachineBasicBlock *BB,
-                                                          bool IsSpill) const {
+MachineBasicBlock *AArch64TargetLowering::EmitZTInstr(MachineInstr &MI,
+                                                      MachineBasicBlock *BB,
+                                                      unsigned Opcode,
+                                                      bool Op0IsDef) const {
   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
   MachineInstrBuilder MIB;
-  unsigned Opc = IsSpill ? AArch64::STR_TX : AArch64::LDR_TX;
-  auto Rs = IsSpill ? RegState::Kill : RegState::Define;
-  MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
-  MIB.addReg(MI.getOperand(0).getReg(), Rs);
-  MIB.add(MI.getOperand(1)); // Base
-  MI.eraseFromParent();      // The pseudo is gone now.
+
+  MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opcode))
+            .addReg(MI.getOperand(0).getReg(), IsZTDest ? RegState::Define : 0);
+  for (unsigned I = 1; I < MI.getNumOperands(); ++I)
+    MIB.add(MI.getOperand(I));
+
+  MI.eraseFromParent(); // The pseudo is gone now.
   return BB;
 }
 
@@ -2884,11 +2886,13 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
   case AArch64::LDR_ZA_PSEUDO:
     return EmitFill(MI, BB);
   case AArch64::LDR_TX_PSEUDO:
-    return EmitZTSpillFill(MI, BB, /*IsSpill=*/false);
+    return EmitZTInstr(MI, BB, AArch64::LDR_TX, /*IsZTDest=*/true);
   case AArch64::STR_TX_PSEUDO:
-    return EmitZTSpillFill(MI, BB, /*IsSpill=*/true);
+    return EmitZTInstr(MI, BB, AArch64::STR_TX, /*IsZTDest=*/false);
   case AArch64::ZERO_M_PSEUDO:
     return EmitZero(MI, BB);
+  case AArch64::ZERO_T_PSEUDO:
+    return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*IsZTDest=*/true);
   }
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 009f8744b408a9d..092e19c962a1a61 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -623,8 +623,8 @@ class AArch64TargetLowering : public TargetLowering {
   MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
                                  MachineInstr &MI, MachineBasicBlock *BB,
                                  bool HasTile) const;
-  MachineBasicBlock *EmitZTSpillFill(MachineInstr &MI, MachineBasicBlock *BB,
-                                     bool IsSpill) const;
+  MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB,
+                                 unsigned Opcode, bool IsZTDest) const;
   MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
 
   MachineBasicBlock *
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index fcfa5f82a3809c2..84ec88d4fd49b69 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -539,7 +539,7 @@ defm SMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"smops", 0b001, int_aarch64_sme_smops
 defm UMOPA_MPPZZ_HtoS : sme2_int_mopx_tile<"umopa", 0b100, int_aarch64_sme_umopa_za32>;
 defm UMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"umops", 0b101, int_aarch64_sme_umops_za32>;
 
-def ZERO_T : sme2_zero_zt<"zero", 0b0001>;
+defm ZERO_T : sme2_zero_zt<"zero", 0b0001>;
 
 defm LDR_TX : sme2_spill_fill_vector<"ldr", 0b01111100, int_aarch64_sme_ldr_zt>;
 defm STR_TX : sme2_spill_fill_vector<"str", 0b11111100, int_aarch64_sme_str_zt>;
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index ef9c323e25bc358..c13c1b4e81faade 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -3119,6 +3119,17 @@ class sme2_zero_zt<string mnemonic, bits<4> opc>
   let Inst{3-0}  = opc;
 }
 
+multiclass sme2_zero_zt<string mnemonic, bits<4> opc> {
+  def NAME : sme2_zero_zt<mnemonic, opc>;
+  def NAME # _PSEUDO
+        : Pseudo<(outs), (ins ZTR:$ZT), []>, Sched<[]> {
+    // Translated to actual instruction in AArch64ISelLowering.cpp
+    let usesCustomInserter = 1;
+  }
+  def : Pat<(int_aarch64_sme_zero_zt (imm_to_zt untyped:$zt)),
+          (!cast<Instruction>(NAME # _PSEUDO) $zt)>;
+}
+
 //===----------------------------------------------------------------------===//
 // SME2 lookup table load/store
 class sme2_spill_fill_vector<string mnemonic, bits<8> opc>
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-zero-zt.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-zero-zt.ll
new file mode 100644
index 000000000000000..14a4dba2466bf3e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-zero-zt.ll
@@ -0,0 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 < %s | FileCheck %s
+
+define void @zero_zt0() {
+; CHECK-LABEL: zero_zt0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zero { zt0 }
+; CHECK-NEXT:    ret
+    call void @llvm.aarch64.sme.zero.zt(i32 0)
+    ret void
+}
+
+declare void @llvm.aarch64.sme.zero.zt(i32)

>From a9a5e7c6169efd3794f8c7ab286a6da608f8f829 Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Fri, 1 Dec 2023 13:19:19 +0000
Subject: [PATCH 2/4] Removes IsPreservesZA and refactor EmitZTInstr

---
 clang/include/clang/Basic/arm_sme.td            | 2 +-
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index 48afd6431fc8b69..34dbfff6c4c85cf 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -325,5 +325,5 @@ let TargetGuard = "sme2" in {
 //
 // Zero ZT0
 //
-  def SVZERO_ZT : Inst<"svzero_zt", "vi", "", MergeNone, "aarch64_sme_zero_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>;
+  def SVZERO_ZT : Inst<"svzero_zt", "vi", "", MergeNone, "aarch64_sme_zero_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA], [ImmCheck<0, ImmCheck0_0>]>;
 }
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f0f0fe1e807b4be..68fa58dea5beb1f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2761,7 +2761,7 @@ MachineBasicBlock *AArch64TargetLowering::EmitZTInstr(MachineInstr &MI,
   MachineInstrBuilder MIB;
 
   MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opcode))
-            .addReg(MI.getOperand(0).getReg(), IsZTDest ? RegState::Define : 0);
+            .addReg(MI.getOperand(0).getReg(), Op0IsDef ? RegState::Define : 0);
   for (unsigned I = 1; I < MI.getNumOperands(); ++I)
     MIB.add(MI.getOperand(I));
 
@@ -2886,13 +2886,13 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
   case AArch64::LDR_ZA_PSEUDO:
     return EmitFill(MI, BB);
   case AArch64::LDR_TX_PSEUDO:
-    return EmitZTInstr(MI, BB, AArch64::LDR_TX, /*IsZTDest=*/true);
+    return EmitZTInstr(MI, BB, AArch64::LDR_TX, /*Op0IsDef=*/true);
   case AArch64::STR_TX_PSEUDO:
-    return EmitZTInstr(MI, BB, AArch64::STR_TX, /*IsZTDest=*/false);
+    return EmitZTInstr(MI, BB, AArch64::STR_TX, /*Op0IsDef=*/false);
   case AArch64::ZERO_M_PSEUDO:
     return EmitZero(MI, BB);
   case AArch64::ZERO_T_PSEUDO:
-    return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*IsZTDest=*/true);
+    return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*Op0IsDef=*/true);
   }
 }
 

>From 73bbd9c6b9548a19e0248ad1747881ae2665a5ac Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Fri, 1 Dec 2023 13:28:45 +0000
Subject: [PATCH 3/4] Remove overloaded tests and rename operand in
 AArch64ISelLowering.h

---
 .../CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c  | 9 ---------
 llvm/lib/Target/AArch64/AArch64ISelLowering.h            | 2 +-
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c
index 4ea26119301cab2..35d35ec6f50b7f7 100644
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c
@@ -4,19 +4,10 @@
 
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sme_draft_spec_subject_to_change.h>
 
-#ifdef SVE_OVERLOADED_FORMS
-// A simple used,unused... macro, long enough to represent any SVE builtin.
-#define SVE_ACLE_FUNC(A1) A1
-#else
-#define SVE_ACLE_FUNC(A1) A1
-#endif
-
 // CHECK-LABEL: @test_svzero_zt(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.zt(i32 0)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 092e19c962a1a61..2b16d2471770d08 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -624,7 +624,7 @@ class AArch64TargetLowering : public TargetLowering {
                                  MachineInstr &MI, MachineBasicBlock *BB,
                                  bool HasTile) const;
   MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB,
-                                 unsigned Opcode, bool IsZTDest) const;
+                                 unsigned Opcode, bool Op0IsDef) const;
   MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
 
   MachineBasicBlock *

>From 0653fd18beb9ee52540cf6e0614f436a0638c0e6 Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Fri, 1 Dec 2023 13:43:55 +0000
Subject: [PATCH 4/4] Remove __arm_preserves_za attribute from test

---
 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c
index 35d35ec6f50b7f7..31e8d6850fb289d 100644
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c
@@ -18,6 +18,6 @@
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.zt(i32 0)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svzero_zt(void) __arm_streaming_compatible __arm_shared_za __arm_preserves_za  {
+void test_svzero_zt(void) __arm_streaming_compatible __arm_shared_za {
   svzero_zt(0);
 }



More information about the cfe-commits mailing list