[clang] [llvm] [AArch64][SME] Implement inline-asm clobbers for za/zt0 (PR #79276)

Matthew Devereau via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 1 00:28:39 PST 2024


https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/79276

>From e98987ebb48839ea652d63dfaa62ed841b426e46 Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Thu, 18 Jan 2024 15:41:25 +0000
Subject: [PATCH 1/3] [AArch64][SME] Implement inline-asm clobbers for za/zt0

This enables specifing "za" or "zt0" to the clobber list
for inline asm. This complies with the acle SME addition to the
asm extension here:
https://github.com/ARM-software/acle/pull/276
---
 clang/lib/Basic/Targets/AArch64.cpp             |  5 ++++-
 clang/test/CodeGen/aarch64-inline-asm.c         |  8 ++++++++
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp |  8 ++++++++
 llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp |  4 ++++
 llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll | 16 ++++++++++++++++
 5 files changed, 40 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll

diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index d47181bfca4fc..781118c935898 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -1200,7 +1200,10 @@ const char *const AArch64TargetInfo::GCCRegNames[] = {
 
     // SVE predicate-as-counter registers
     "pn0",  "pn1",  "pn2",  "pn3",  "pn4",  "pn5",  "pn6",  "pn7",  "pn8",
-    "pn9",  "pn10", "pn11", "pn12", "pn13", "pn14", "pn15"
+    "pn9",  "pn10", "pn11", "pn12", "pn13", "pn14", "pn15",
+
+    // SME registers
+    "za", "zt0",
 };
 
 ArrayRef<const char *> AArch64TargetInfo::getGCCRegNames() const {
diff --git a/clang/test/CodeGen/aarch64-inline-asm.c b/clang/test/CodeGen/aarch64-inline-asm.c
index 75e9a8c46b876..8ddee560b11da 100644
--- a/clang/test/CodeGen/aarch64-inline-asm.c
+++ b/clang/test/CodeGen/aarch64-inline-asm.c
@@ -95,3 +95,11 @@ void test_reduced_gpr_constraints(int var32, long var64) {
 // CHECK: [[ARG2:%.+]] = load i64, ptr
 // CHECK: call void asm sideeffect "add x0, x0, $0", "@3Ucj,~{x0}"(i64 [[ARG2]])
 }
+
+void test_sme_constraints(){
+  asm("movt zt0[3, mul vl], z0" : : : "za");
+// CHECK: call void asm sideeffect "movt zt0[3, mul vl], z0", "~{za}"()
+
+  asm("movt zt0[3, mul vl], z0" : : : "zt0");
+// CHECK: call void asm sideeffect "movt zt0[3, mul vl], z0", "~{zt0}"()
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 332fb37655288..6a210846cf4df 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10702,6 +10702,14 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
       parseConstraintCode(Constraint) != AArch64CC::Invalid)
     return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
 
+  if (StringRef("{za}").equals_insensitive(Constraint)){
+    return std::make_pair(unsigned(AArch64::ZA), &AArch64::MPRRegClass);
+  }
+
+  if (StringRef("{zt0}").equals_insensitive(Constraint)){
+    return std::make_pair(unsigned(AArch64::ZT0), &AArch64::ZTRRegClass);
+  }
+
   // Use the default implementation in TargetLowering to convert the register
   // constraint into a member of a register class.
   std::pair<unsigned, const TargetRegisterClass *> Res;
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index ea9882160d6fb..7d6b86ab8a3e9 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -507,6 +507,10 @@ bool AArch64RegisterInfo::isAsmClobberable(const MachineFunction &MF,
         MCRegisterInfo::regsOverlap(PhysReg, AArch64::X16))
     return true;
 
+  // ZA/ZT0 registers are reserved but may be permitted in the clobber list.
+  if (PhysReg.id() == AArch64::ZA || PhysReg.id() == AArch64::ZT0)
+    return true;
+
   return !isReservedReg(MF, PhysReg);
 }
 
diff --git a/llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll b/llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll
new file mode 100644
index 0000000000000..a8cba7dc9a91e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll
@@ -0,0 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-none-linux-gnu -stop-after=aarch64-isel < %s -o - | FileCheck %s
+
+define void @alpha(<vscale x 4 x i32> %x) local_unnamed_addr {
+entry:
+; CHECK: INLINEASM &"movt zt0[3, mul vl], z0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $za
+  tail call void asm sideeffect "movt zt0[3, mul vl], z0", "~{za}"()
+  ret void
+}
+
+define void @beta(<vscale x 4 x i32> %x) local_unnamed_addr {
+entry:
+; CHECK: INLINEASM &"movt zt0[3, mul vl], z0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $zt0
+  tail call void asm sideeffect "movt zt0[3, mul vl], z0", "~{zt0}"()
+  ret void
+}

>From 6391def8b7cfd88b12544766c94b75cb2a5bd385 Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Mon, 29 Jan 2024 09:59:47 +0000
Subject: [PATCH 2/3] run clang-format

---
 clang/lib/Basic/Targets/AArch64.cpp           | 249 ++++++++++++++++--
 .../Target/AArch64/AArch64ISelLowering.cpp    |   4 +-
 2 files changed, 228 insertions(+), 25 deletions(-)

diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 781118c935898..e15242e79e26d 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -1165,45 +1165,248 @@ TargetInfo::BuiltinVaListKind AArch64TargetInfo::getBuiltinVaListKind() const {
 
 const char *const AArch64TargetInfo::GCCRegNames[] = {
     // 32-bit Integer registers
-    "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "w10", "w11",
-    "w12", "w13", "w14", "w15", "w16", "w17", "w18", "w19", "w20", "w21", "w22",
-    "w23", "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wsp",
+    "w0",
+    "w1",
+    "w2",
+    "w3",
+    "w4",
+    "w5",
+    "w6",
+    "w7",
+    "w8",
+    "w9",
+    "w10",
+    "w11",
+    "w12",
+    "w13",
+    "w14",
+    "w15",
+    "w16",
+    "w17",
+    "w18",
+    "w19",
+    "w20",
+    "w21",
+    "w22",
+    "w23",
+    "w24",
+    "w25",
+    "w26",
+    "w27",
+    "w28",
+    "w29",
+    "w30",
+    "wsp",
 
     // 64-bit Integer registers
-    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11",
-    "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22",
-    "x23", "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp",
+    "x0",
+    "x1",
+    "x2",
+    "x3",
+    "x4",
+    "x5",
+    "x6",
+    "x7",
+    "x8",
+    "x9",
+    "x10",
+    "x11",
+    "x12",
+    "x13",
+    "x14",
+    "x15",
+    "x16",
+    "x17",
+    "x18",
+    "x19",
+    "x20",
+    "x21",
+    "x22",
+    "x23",
+    "x24",
+    "x25",
+    "x26",
+    "x27",
+    "x28",
+    "fp",
+    "lr",
+    "sp",
 
     // 32-bit floating point regsisters
-    "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11",
-    "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
-    "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
+    "s0",
+    "s1",
+    "s2",
+    "s3",
+    "s4",
+    "s5",
+    "s6",
+    "s7",
+    "s8",
+    "s9",
+    "s10",
+    "s11",
+    "s12",
+    "s13",
+    "s14",
+    "s15",
+    "s16",
+    "s17",
+    "s18",
+    "s19",
+    "s20",
+    "s21",
+    "s22",
+    "s23",
+    "s24",
+    "s25",
+    "s26",
+    "s27",
+    "s28",
+    "s29",
+    "s30",
+    "s31",
 
     // 64-bit floating point regsisters
-    "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11",
-    "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22",
-    "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
+    "d0",
+    "d1",
+    "d2",
+    "d3",
+    "d4",
+    "d5",
+    "d6",
+    "d7",
+    "d8",
+    "d9",
+    "d10",
+    "d11",
+    "d12",
+    "d13",
+    "d14",
+    "d15",
+    "d16",
+    "d17",
+    "d18",
+    "d19",
+    "d20",
+    "d21",
+    "d22",
+    "d23",
+    "d24",
+    "d25",
+    "d26",
+    "d27",
+    "d28",
+    "d29",
+    "d30",
+    "d31",
 
     // Neon vector registers
-    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11",
-    "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22",
-    "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
+    "v0",
+    "v1",
+    "v2",
+    "v3",
+    "v4",
+    "v5",
+    "v6",
+    "v7",
+    "v8",
+    "v9",
+    "v10",
+    "v11",
+    "v12",
+    "v13",
+    "v14",
+    "v15",
+    "v16",
+    "v17",
+    "v18",
+    "v19",
+    "v20",
+    "v21",
+    "v22",
+    "v23",
+    "v24",
+    "v25",
+    "v26",
+    "v27",
+    "v28",
+    "v29",
+    "v30",
+    "v31",
 
     // SVE vector registers
-    "z0",  "z1",  "z2",  "z3",  "z4",  "z5",  "z6",  "z7",  "z8",  "z9",  "z10",
-    "z11", "z12", "z13", "z14", "z15", "z16", "z17", "z18", "z19", "z20", "z21",
-    "z22", "z23", "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31",
+    "z0",
+    "z1",
+    "z2",
+    "z3",
+    "z4",
+    "z5",
+    "z6",
+    "z7",
+    "z8",
+    "z9",
+    "z10",
+    "z11",
+    "z12",
+    "z13",
+    "z14",
+    "z15",
+    "z16",
+    "z17",
+    "z18",
+    "z19",
+    "z20",
+    "z21",
+    "z22",
+    "z23",
+    "z24",
+    "z25",
+    "z26",
+    "z27",
+    "z28",
+    "z29",
+    "z30",
+    "z31",
 
     // SVE predicate registers
-    "p0",  "p1",  "p2",  "p3",  "p4",  "p5",  "p6",  "p7",  "p8",  "p9",  "p10",
-    "p11", "p12", "p13", "p14", "p15",
+    "p0",
+    "p1",
+    "p2",
+    "p3",
+    "p4",
+    "p5",
+    "p6",
+    "p7",
+    "p8",
+    "p9",
+    "p10",
+    "p11",
+    "p12",
+    "p13",
+    "p14",
+    "p15",
 
     // SVE predicate-as-counter registers
-    "pn0",  "pn1",  "pn2",  "pn3",  "pn4",  "pn5",  "pn6",  "pn7",  "pn8",
-    "pn9",  "pn10", "pn11", "pn12", "pn13", "pn14", "pn15",
+    "pn0",
+    "pn1",
+    "pn2",
+    "pn3",
+    "pn4",
+    "pn5",
+    "pn6",
+    "pn7",
+    "pn8",
+    "pn9",
+    "pn10",
+    "pn11",
+    "pn12",
+    "pn13",
+    "pn14",
+    "pn15",
 
     // SME registers
-    "za", "zt0",
+    "za",
+    "zt0",
 };
 
 ArrayRef<const char *> AArch64TargetInfo::getGCCRegNames() const {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6a210846cf4df..e86d57968a46f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10702,11 +10702,11 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
       parseConstraintCode(Constraint) != AArch64CC::Invalid)
     return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
 
-  if (StringRef("{za}").equals_insensitive(Constraint)){
+  if (StringRef("{za}").equals_insensitive(Constraint)) {
     return std::make_pair(unsigned(AArch64::ZA), &AArch64::MPRRegClass);
   }
 
-  if (StringRef("{zt0}").equals_insensitive(Constraint)){
+  if (StringRef("{zt0}").equals_insensitive(Constraint)) {
     return std::make_pair(unsigned(AArch64::ZT0), &AArch64::ZTRRegClass);
   }
 

>From 4beffbf7dd9d52bd7b73f589a94ee92517a24e63 Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Wed, 31 Jan 2024 23:09:05 +0000
Subject: [PATCH 3/3] Disable clang-format for GCCRegNames contents

---
 clang/lib/Basic/Targets/AArch64.cpp | 253 +++-------------------------
 1 file changed, 27 insertions(+), 226 deletions(-)

diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index e15242e79e26d..dbf0fdad15dc8 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -1164,249 +1164,50 @@ TargetInfo::BuiltinVaListKind AArch64TargetInfo::getBuiltinVaListKind() const {
 }
 
 const char *const AArch64TargetInfo::GCCRegNames[] = {
+    // clang-format off
+
     // 32-bit Integer registers
-    "w0",
-    "w1",
-    "w2",
-    "w3",
-    "w4",
-    "w5",
-    "w6",
-    "w7",
-    "w8",
-    "w9",
-    "w10",
-    "w11",
-    "w12",
-    "w13",
-    "w14",
-    "w15",
-    "w16",
-    "w17",
-    "w18",
-    "w19",
-    "w20",
-    "w21",
-    "w22",
-    "w23",
-    "w24",
-    "w25",
-    "w26",
-    "w27",
-    "w28",
-    "w29",
-    "w30",
-    "wsp",
+    "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "w10", "w11",
+    "w12", "w13", "w14", "w15", "w16", "w17", "w18", "w19", "w20", "w21", "w22",
+    "w23", "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wsp",
 
     // 64-bit Integer registers
-    "x0",
-    "x1",
-    "x2",
-    "x3",
-    "x4",
-    "x5",
-    "x6",
-    "x7",
-    "x8",
-    "x9",
-    "x10",
-    "x11",
-    "x12",
-    "x13",
-    "x14",
-    "x15",
-    "x16",
-    "x17",
-    "x18",
-    "x19",
-    "x20",
-    "x21",
-    "x22",
-    "x23",
-    "x24",
-    "x25",
-    "x26",
-    "x27",
-    "x28",
-    "fp",
-    "lr",
-    "sp",
+    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11",
+    "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22",
+    "x23", "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp",
 
     // 32-bit floating point regsisters
-    "s0",
-    "s1",
-    "s2",
-    "s3",
-    "s4",
-    "s5",
-    "s6",
-    "s7",
-    "s8",
-    "s9",
-    "s10",
-    "s11",
-    "s12",
-    "s13",
-    "s14",
-    "s15",
-    "s16",
-    "s17",
-    "s18",
-    "s19",
-    "s20",
-    "s21",
-    "s22",
-    "s23",
-    "s24",
-    "s25",
-    "s26",
-    "s27",
-    "s28",
-    "s29",
-    "s30",
-    "s31",
+    "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11",
+    "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
+    "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
 
     // 64-bit floating point regsisters
-    "d0",
-    "d1",
-    "d2",
-    "d3",
-    "d4",
-    "d5",
-    "d6",
-    "d7",
-    "d8",
-    "d9",
-    "d10",
-    "d11",
-    "d12",
-    "d13",
-    "d14",
-    "d15",
-    "d16",
-    "d17",
-    "d18",
-    "d19",
-    "d20",
-    "d21",
-    "d22",
-    "d23",
-    "d24",
-    "d25",
-    "d26",
-    "d27",
-    "d28",
-    "d29",
-    "d30",
-    "d31",
+    "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11",
+    "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22",
+    "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
 
     // Neon vector registers
-    "v0",
-    "v1",
-    "v2",
-    "v3",
-    "v4",
-    "v5",
-    "v6",
-    "v7",
-    "v8",
-    "v9",
-    "v10",
-    "v11",
-    "v12",
-    "v13",
-    "v14",
-    "v15",
-    "v16",
-    "v17",
-    "v18",
-    "v19",
-    "v20",
-    "v21",
-    "v22",
-    "v23",
-    "v24",
-    "v25",
-    "v26",
-    "v27",
-    "v28",
-    "v29",
-    "v30",
-    "v31",
+    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11",
+    "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22",
+    "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
 
     // SVE vector registers
-    "z0",
-    "z1",
-    "z2",
-    "z3",
-    "z4",
-    "z5",
-    "z6",
-    "z7",
-    "z8",
-    "z9",
-    "z10",
-    "z11",
-    "z12",
-    "z13",
-    "z14",
-    "z15",
-    "z16",
-    "z17",
-    "z18",
-    "z19",
-    "z20",
-    "z21",
-    "z22",
-    "z23",
-    "z24",
-    "z25",
-    "z26",
-    "z27",
-    "z28",
-    "z29",
-    "z30",
-    "z31",
+    "z0",  "z1",  "z2",  "z3",  "z4",  "z5",  "z6",  "z7",  "z8",  "z9",  "z10",
+    "z11", "z12", "z13", "z14", "z15", "z16", "z17", "z18", "z19", "z20", "z21",
+    "z22", "z23", "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31",
 
     // SVE predicate registers
-    "p0",
-    "p1",
-    "p2",
-    "p3",
-    "p4",
-    "p5",
-    "p6",
-    "p7",
-    "p8",
-    "p9",
-    "p10",
-    "p11",
-    "p12",
-    "p13",
-    "p14",
-    "p15",
+    "p0",  "p1",  "p2",  "p3",  "p4",  "p5",  "p6",  "p7",  "p8",  "p9",  "p10",
+    "p11", "p12", "p13", "p14", "p15",
 
     // SVE predicate-as-counter registers
-    "pn0",
-    "pn1",
-    "pn2",
-    "pn3",
-    "pn4",
-    "pn5",
-    "pn6",
-    "pn7",
-    "pn8",
-    "pn9",
-    "pn10",
-    "pn11",
-    "pn12",
-    "pn13",
-    "pn14",
-    "pn15",
+    "pn0",  "pn1",  "pn2",  "pn3",  "pn4",  "pn5",  "pn6",  "pn7",  "pn8",
+    "pn9",  "pn10", "pn11", "pn12", "pn13", "pn14", "pn15",
 
     // SME registers
-    "za",
-    "zt0",
+    "za", "zt0",
+
+    // clang-format on
 };
 
 ArrayRef<const char *> AArch64TargetInfo::getGCCRegNames() const {



More information about the llvm-commits mailing list