[clang] 5fe7ae8 - [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (#72849)
Matt Devereau via cfe-commits
cfe-commits at lists.llvm.org
Fri Dec 1 01:35:36 PST 2023
Author: Matt Devereau
Date: 2023-12-01T09:34:38Z
New Revision: 5fe7ae848cc6cb2afc3aab332743ffa2bb635fc3
URL: https://github.com/llvm/llvm-project/commit/5fe7ae848cc6cb2afc3aab332743ffa2bb635fc3
DIFF: https://github.com/llvm/llvm-project/commit/5fe7ae848cc6cb2afc3aab332743ffa2bb635fc3.diff
LOG: [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (#72849)
Adds the builtins:
void svldr_zt(uint64_t zt, const void *rn)
void svstr_zt(uint64_t zt, void *rn)
And the intrinsics:
call void @llvm.aarch64.sme.ldr.zt(i32, ptr)
tail call void @llvm.aarch64.sme.str.zt(i32, ptr)
Patch by: Kerry McLaughlin kerry.mclaughlin at arm.com
Added:
clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c
llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll
Modified:
clang/include/clang/Basic/arm_sme.td
clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
llvm/lib/Target/AArch64/SMEInstrFormats.td
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index d55deeaa40bbcd5..7aae3c832bb1fe2 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -314,3 +314,11 @@ let TargetGuard = "sme2" in {
def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>;
}
+
+//
+// Spill and fill of ZT0
+//
+let TargetGuard = "sme2" in {
+ def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>;
+ def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>;
+}
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c
new file mode 100644
index 000000000000000..126a4fc1045853f
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c
@@ -0,0 +1,41 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sme_draft_spec_subject_to_change.h>
+
+// LDR ZT0
+
+// CHECK-LABEL: @test_svldr_zt(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: @_Z13test_svldr_ztPKv(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svldr_zt(const void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za {
+ svldr_zt(0, base);
+}
+
+// STR ZT0
+
+// CHECK-LABEL: @test_svstr_zt(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.str.zt(i32 0, ptr [[BASE:%.*]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: @_Z13test_svstr_ztPv(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.str.zt(i32 0, ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svstr_zt(void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za {
+ svstr_zt(0, base);
+}
diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
index 4c35a238d9f9e2c..70987ad395f735a 100644
--- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
+++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \
-// RUN: -target-feature +sve2 -target-feature +sme2 -target-feature +sve -fsyntax-only -verify %s
+// RUN: -target-feature +sve2 -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -fsyntax-only -verify %s
// REQUIRES: aarch64-registered-target
@@ -19,3 +19,8 @@ void test_outer_product(svbool_t pred, svint16_t s16, svuint16_t u16, svint32_t
svbmops_za32_u32_m(4, pred, pred, u32, u32); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
svbmops_za32_s32_m(4, pred, pred, s32, s32); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
}
+
+void test_ldr_str_zt(const void *const_base, void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za {
+ svldr_zt(1, const_base); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
+ svstr_zt(1, base); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
+}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 136512db123b30a..2f49e9a6b37cc31 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -326,9 +326,14 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
return false;
}
- template <unsigned BaseReg> bool ImmToTile(SDValue N, SDValue &Imm) {
+ template <unsigned BaseReg, unsigned Max>
+ bool ImmToTile(SDValue N, SDValue &Imm) {
if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
uint64_t C = CI->getZExtValue();
+
+ if (C > Max)
+ return false;
+
Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
return true;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index cb093a1613110e8..4379c3fde6f3c5d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2753,6 +2753,20 @@ AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const {
return BB;
}
+MachineBasicBlock *AArch64TargetLowering::EmitZTSpillFill(MachineInstr &MI,
+ MachineBasicBlock *BB,
+ bool IsSpill) const {
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ MachineInstrBuilder MIB;
+ unsigned Opc = IsSpill ? AArch64::STR_TX : AArch64::LDR_TX;
+ auto Rs = IsSpill ? RegState::Kill : RegState::Define;
+ MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
+ MIB.addReg(MI.getOperand(0).getReg(), Rs);
+ MIB.add(MI.getOperand(1)); // Base
+ MI.eraseFromParent(); // The pseudo is gone now.
+ return BB;
+}
+
MachineBasicBlock *
AArch64TargetLowering::EmitZAInstr(unsigned Opc, unsigned BaseReg,
MachineInstr &MI,
@@ -2869,6 +2883,10 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0, MI, BB);
case AArch64::LDR_ZA_PSEUDO:
return EmitFill(MI, BB);
+ case AArch64::LDR_TX_PSEUDO:
+ return EmitZTSpillFill(MI, BB, /*IsSpill=*/false);
+ case AArch64::STR_TX_PSEUDO:
+ return EmitZTSpillFill(MI, BB, /*IsSpill=*/true);
case AArch64::ZERO_M_PSEUDO:
return EmitZero(MI, BB);
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 25d7cb6d212d1f4..009f8744b408a9d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -623,6 +623,8 @@ class AArch64TargetLowering : public TargetLowering {
MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
MachineInstr &MI, MachineBasicBlock *BB,
bool HasTile) const;
+ MachineBasicBlock *EmitZTSpillFill(MachineInstr &MI, MachineBasicBlock *BB,
+ bool IsSpill) const;
MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
MachineBasicBlock *
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index ed64a7b4984c17c..24ba9dd95004c6f 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -440,6 +440,12 @@ AArch64RegisterInfo::getStrictlyReservedRegs(const MachineFunction &MF) const {
Reserved.set(SubReg);
}
+ if (MF.getSubtarget<AArch64Subtarget>().hasSME2()) {
+ for (MCSubRegIterator SubReg(AArch64::ZT0, this, /*self=*/true);
+ SubReg.isValid(); ++SubReg)
+ Reserved.set(*SubReg);
+ }
+
markSuperRegs(Reserved, AArch64::FPCR);
if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) {
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index bb9464a8d2e1cf2..fcfa5f82a3809c2 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -541,8 +541,8 @@ defm UMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"umops", 0b101, int_aarch64_sme_umops
def ZERO_T : sme2_zero_zt<"zero", 0b0001>;
-def LDR_TX : sme2_spill_fill_vector<"ldr", 0b01111100>;
-def STR_TX : sme2_spill_fill_vector<"str", 0b11111100>;
+defm LDR_TX : sme2_spill_fill_vector<"ldr", 0b01111100, int_aarch64_sme_ldr_zt>;
+defm STR_TX : sme2_spill_fill_vector<"str", 0b11111100, int_aarch64_sme_str_zt>;
def MOVT_XTI : sme2_movt_zt_to_scalar<"movt", 0b0011111>;
def MOVT_TIX : sme2_movt_scalar_to_zt<"movt", 0b0011111>;
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 6c9b1f11a4decde..ef9c323e25bc358 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -10,11 +10,12 @@
//
//===----------------------------------------------------------------------===//
-def imm_to_tile8 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAB0>", []>;
-def imm_to_tile16 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAH0>", []>;
-def imm_to_tile32 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAS0>", []>;
-def imm_to_tile64 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAD0>", []>;
-def imm_to_tile128 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAQ0>", []>;
+def imm_to_tile8 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAB0, 0>", []>;
+def imm_to_tile16 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAH0, 1>", []>;
+def imm_to_tile32 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAS0, 3>", []>;
+def imm_to_tile64 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAD0, 7>", []>;
+def imm_to_tile128 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAQ0, 15>", []>;
+def imm_to_zt : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZT0, 0>", []>;
def tileslice8 : ComplexPattern<i32 , 2, "SelectSMETileSlice<15, 1>", []>;
def tileslice16 : ComplexPattern<i32 , 2, "SelectSMETileSlice<7, 1>", []>;
@@ -3137,6 +3138,18 @@ class sme2_spill_fill_vector<string mnemonic, bits<8> opc>
let mayStore = opc{7};
}
+
+multiclass sme2_spill_fill_vector<string mnemonic, bits<8> opc, SDPatternOperator op> {
+ def NAME : sme2_spill_fill_vector<mnemonic, opc>;
+ def NAME # _PSEUDO
+ : Pseudo<(outs), (ins ZTR:$ZTt, GPR64sp:$base), []>, Sched<[]> {
+ // Translated to actual instruction in AArch64ISelLowering.cpp
+ let usesCustomInserter = 1;
+ }
+ def : Pat<(op (imm_to_zt untyped:$tile), GPR64sp:$base),
+ (!cast<Instruction>(NAME # _PSEUDO) $tile, $base)>;
+}
+
//===----------------------------------------------------------------------===///
// SME2 move to/from lookup table
class sme2_movt_zt_to_scalar<string mnemonic, bits<7> opc>
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll
new file mode 100644
index 000000000000000..30205d86f2fb203
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s
+
+; LDR
+
+define void @ldr_zt0(ptr %ptr) {
+; CHECK-LABEL: ldr_zt0:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr zt0, [x0]
+; CHECK-NEXT: ret
+ call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr %ptr)
+ ret void;
+}
+
+; STR
+
+define void @str_zt0(ptr %ptr) {
+; CHECK-LABEL: str_zt0:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str zt0, [x0]
+; CHECK-NEXT: ret
+ call void @llvm.aarch64.sme.str.zt(i32 0, ptr %ptr)
+ ret void;
+}
+
+declare void @llvm.aarch64.sme.ldr.zt(i32, ptr)
+declare void @llvm.aarch64.sme.str.zt(i32, ptr)
More information about the cfe-commits
mailing list