[clang] [llvm] [AArch64][SME] Implement inline-asm clobbers for za/zt0 (PR #79276)
Matthew Devereau via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 1 00:28:39 PST 2024
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/79276
>From e98987ebb48839ea652d63dfaa62ed841b426e46 Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Thu, 18 Jan 2024 15:41:25 +0000
Subject: [PATCH 1/3] [AArch64][SME] Implement inline-asm clobbers for za/zt0
This enables specifing "za" or "zt0" to the clobber list
for inline asm. This complies with the acle SME addition to the
asm extension here:
https://github.com/ARM-software/acle/pull/276
---
clang/lib/Basic/Targets/AArch64.cpp | 5 ++++-
clang/test/CodeGen/aarch64-inline-asm.c | 8 ++++++++
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 8 ++++++++
llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp | 4 ++++
llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll | 16 ++++++++++++++++
5 files changed, 40 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index d47181bfca4fc..781118c935898 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -1200,7 +1200,10 @@ const char *const AArch64TargetInfo::GCCRegNames[] = {
// SVE predicate-as-counter registers
"pn0", "pn1", "pn2", "pn3", "pn4", "pn5", "pn6", "pn7", "pn8",
- "pn9", "pn10", "pn11", "pn12", "pn13", "pn14", "pn15"
+ "pn9", "pn10", "pn11", "pn12", "pn13", "pn14", "pn15",
+
+ // SME registers
+ "za", "zt0",
};
ArrayRef<const char *> AArch64TargetInfo::getGCCRegNames() const {
diff --git a/clang/test/CodeGen/aarch64-inline-asm.c b/clang/test/CodeGen/aarch64-inline-asm.c
index 75e9a8c46b876..8ddee560b11da 100644
--- a/clang/test/CodeGen/aarch64-inline-asm.c
+++ b/clang/test/CodeGen/aarch64-inline-asm.c
@@ -95,3 +95,11 @@ void test_reduced_gpr_constraints(int var32, long var64) {
// CHECK: [[ARG2:%.+]] = load i64, ptr
// CHECK: call void asm sideeffect "add x0, x0, $0", "@3Ucj,~{x0}"(i64 [[ARG2]])
}
+
+void test_sme_constraints(){
+ asm("movt zt0[3, mul vl], z0" : : : "za");
+// CHECK: call void asm sideeffect "movt zt0[3, mul vl], z0", "~{za}"()
+
+ asm("movt zt0[3, mul vl], z0" : : : "zt0");
+// CHECK: call void asm sideeffect "movt zt0[3, mul vl], z0", "~{zt0}"()
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 332fb37655288..6a210846cf4df 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10702,6 +10702,14 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
parseConstraintCode(Constraint) != AArch64CC::Invalid)
return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
+ if (StringRef("{za}").equals_insensitive(Constraint)){
+ return std::make_pair(unsigned(AArch64::ZA), &AArch64::MPRRegClass);
+ }
+
+ if (StringRef("{zt0}").equals_insensitive(Constraint)){
+ return std::make_pair(unsigned(AArch64::ZT0), &AArch64::ZTRRegClass);
+ }
+
// Use the default implementation in TargetLowering to convert the register
// constraint into a member of a register class.
std::pair<unsigned, const TargetRegisterClass *> Res;
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index ea9882160d6fb..7d6b86ab8a3e9 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -507,6 +507,10 @@ bool AArch64RegisterInfo::isAsmClobberable(const MachineFunction &MF,
MCRegisterInfo::regsOverlap(PhysReg, AArch64::X16))
return true;
+ // ZA/ZT0 registers are reserved but may be permitted in the clobber list.
+ if (PhysReg.id() == AArch64::ZA || PhysReg.id() == AArch64::ZT0)
+ return true;
+
return !isReservedReg(MF, PhysReg);
}
diff --git a/llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll b/llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll
new file mode 100644
index 0000000000000..a8cba7dc9a91e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll
@@ -0,0 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-none-linux-gnu -stop-after=aarch64-isel < %s -o - | FileCheck %s
+
+define void @alpha(<vscale x 4 x i32> %x) local_unnamed_addr {
+entry:
+; CHECK: INLINEASM &"movt zt0[3, mul vl], z0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $za
+ tail call void asm sideeffect "movt zt0[3, mul vl], z0", "~{za}"()
+ ret void
+}
+
+define void @beta(<vscale x 4 x i32> %x) local_unnamed_addr {
+entry:
+; CHECK: INLINEASM &"movt zt0[3, mul vl], z0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $zt0
+ tail call void asm sideeffect "movt zt0[3, mul vl], z0", "~{zt0}"()
+ ret void
+}
>From 6391def8b7cfd88b12544766c94b75cb2a5bd385 Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Mon, 29 Jan 2024 09:59:47 +0000
Subject: [PATCH 2/3] run clang-format
---
clang/lib/Basic/Targets/AArch64.cpp | 249 ++++++++++++++++--
.../Target/AArch64/AArch64ISelLowering.cpp | 4 +-
2 files changed, 228 insertions(+), 25 deletions(-)
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 781118c935898..e15242e79e26d 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -1165,45 +1165,248 @@ TargetInfo::BuiltinVaListKind AArch64TargetInfo::getBuiltinVaListKind() const {
const char *const AArch64TargetInfo::GCCRegNames[] = {
// 32-bit Integer registers
- "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "w10", "w11",
- "w12", "w13", "w14", "w15", "w16", "w17", "w18", "w19", "w20", "w21", "w22",
- "w23", "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wsp",
+ "w0",
+ "w1",
+ "w2",
+ "w3",
+ "w4",
+ "w5",
+ "w6",
+ "w7",
+ "w8",
+ "w9",
+ "w10",
+ "w11",
+ "w12",
+ "w13",
+ "w14",
+ "w15",
+ "w16",
+ "w17",
+ "w18",
+ "w19",
+ "w20",
+ "w21",
+ "w22",
+ "w23",
+ "w24",
+ "w25",
+ "w26",
+ "w27",
+ "w28",
+ "w29",
+ "w30",
+ "wsp",
// 64-bit Integer registers
- "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11",
- "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22",
- "x23", "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp",
+ "x0",
+ "x1",
+ "x2",
+ "x3",
+ "x4",
+ "x5",
+ "x6",
+ "x7",
+ "x8",
+ "x9",
+ "x10",
+ "x11",
+ "x12",
+ "x13",
+ "x14",
+ "x15",
+ "x16",
+ "x17",
+ "x18",
+ "x19",
+ "x20",
+ "x21",
+ "x22",
+ "x23",
+ "x24",
+ "x25",
+ "x26",
+ "x27",
+ "x28",
+ "fp",
+ "lr",
+ "sp",
// 32-bit floating point regsisters
- "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11",
- "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
- "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
+ "s0",
+ "s1",
+ "s2",
+ "s3",
+ "s4",
+ "s5",
+ "s6",
+ "s7",
+ "s8",
+ "s9",
+ "s10",
+ "s11",
+ "s12",
+ "s13",
+ "s14",
+ "s15",
+ "s16",
+ "s17",
+ "s18",
+ "s19",
+ "s20",
+ "s21",
+ "s22",
+ "s23",
+ "s24",
+ "s25",
+ "s26",
+ "s27",
+ "s28",
+ "s29",
+ "s30",
+ "s31",
// 64-bit floating point regsisters
- "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11",
- "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22",
- "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
+ "d0",
+ "d1",
+ "d2",
+ "d3",
+ "d4",
+ "d5",
+ "d6",
+ "d7",
+ "d8",
+ "d9",
+ "d10",
+ "d11",
+ "d12",
+ "d13",
+ "d14",
+ "d15",
+ "d16",
+ "d17",
+ "d18",
+ "d19",
+ "d20",
+ "d21",
+ "d22",
+ "d23",
+ "d24",
+ "d25",
+ "d26",
+ "d27",
+ "d28",
+ "d29",
+ "d30",
+ "d31",
// Neon vector registers
- "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11",
- "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22",
- "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
+ "v0",
+ "v1",
+ "v2",
+ "v3",
+ "v4",
+ "v5",
+ "v6",
+ "v7",
+ "v8",
+ "v9",
+ "v10",
+ "v11",
+ "v12",
+ "v13",
+ "v14",
+ "v15",
+ "v16",
+ "v17",
+ "v18",
+ "v19",
+ "v20",
+ "v21",
+ "v22",
+ "v23",
+ "v24",
+ "v25",
+ "v26",
+ "v27",
+ "v28",
+ "v29",
+ "v30",
+ "v31",
// SVE vector registers
- "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", "z8", "z9", "z10",
- "z11", "z12", "z13", "z14", "z15", "z16", "z17", "z18", "z19", "z20", "z21",
- "z22", "z23", "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31",
+ "z0",
+ "z1",
+ "z2",
+ "z3",
+ "z4",
+ "z5",
+ "z6",
+ "z7",
+ "z8",
+ "z9",
+ "z10",
+ "z11",
+ "z12",
+ "z13",
+ "z14",
+ "z15",
+ "z16",
+ "z17",
+ "z18",
+ "z19",
+ "z20",
+ "z21",
+ "z22",
+ "z23",
+ "z24",
+ "z25",
+ "z26",
+ "z27",
+ "z28",
+ "z29",
+ "z30",
+ "z31",
// SVE predicate registers
- "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10",
- "p11", "p12", "p13", "p14", "p15",
+ "p0",
+ "p1",
+ "p2",
+ "p3",
+ "p4",
+ "p5",
+ "p6",
+ "p7",
+ "p8",
+ "p9",
+ "p10",
+ "p11",
+ "p12",
+ "p13",
+ "p14",
+ "p15",
// SVE predicate-as-counter registers
- "pn0", "pn1", "pn2", "pn3", "pn4", "pn5", "pn6", "pn7", "pn8",
- "pn9", "pn10", "pn11", "pn12", "pn13", "pn14", "pn15",
+ "pn0",
+ "pn1",
+ "pn2",
+ "pn3",
+ "pn4",
+ "pn5",
+ "pn6",
+ "pn7",
+ "pn8",
+ "pn9",
+ "pn10",
+ "pn11",
+ "pn12",
+ "pn13",
+ "pn14",
+ "pn15",
// SME registers
- "za", "zt0",
+ "za",
+ "zt0",
};
ArrayRef<const char *> AArch64TargetInfo::getGCCRegNames() const {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6a210846cf4df..e86d57968a46f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10702,11 +10702,11 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
parseConstraintCode(Constraint) != AArch64CC::Invalid)
return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
- if (StringRef("{za}").equals_insensitive(Constraint)){
+ if (StringRef("{za}").equals_insensitive(Constraint)) {
return std::make_pair(unsigned(AArch64::ZA), &AArch64::MPRRegClass);
}
- if (StringRef("{zt0}").equals_insensitive(Constraint)){
+ if (StringRef("{zt0}").equals_insensitive(Constraint)) {
return std::make_pair(unsigned(AArch64::ZT0), &AArch64::ZTRRegClass);
}
>From 4beffbf7dd9d52bd7b73f589a94ee92517a24e63 Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Wed, 31 Jan 2024 23:09:05 +0000
Subject: [PATCH 3/3] Disable clang-format for GCCRegNames contents
---
clang/lib/Basic/Targets/AArch64.cpp | 253 +++-------------------------
1 file changed, 27 insertions(+), 226 deletions(-)
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index e15242e79e26d..dbf0fdad15dc8 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -1164,249 +1164,50 @@ TargetInfo::BuiltinVaListKind AArch64TargetInfo::getBuiltinVaListKind() const {
}
const char *const AArch64TargetInfo::GCCRegNames[] = {
+ // clang-format off
+
// 32-bit Integer registers
- "w0",
- "w1",
- "w2",
- "w3",
- "w4",
- "w5",
- "w6",
- "w7",
- "w8",
- "w9",
- "w10",
- "w11",
- "w12",
- "w13",
- "w14",
- "w15",
- "w16",
- "w17",
- "w18",
- "w19",
- "w20",
- "w21",
- "w22",
- "w23",
- "w24",
- "w25",
- "w26",
- "w27",
- "w28",
- "w29",
- "w30",
- "wsp",
+ "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "w10", "w11",
+ "w12", "w13", "w14", "w15", "w16", "w17", "w18", "w19", "w20", "w21", "w22",
+ "w23", "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wsp",
// 64-bit Integer registers
- "x0",
- "x1",
- "x2",
- "x3",
- "x4",
- "x5",
- "x6",
- "x7",
- "x8",
- "x9",
- "x10",
- "x11",
- "x12",
- "x13",
- "x14",
- "x15",
- "x16",
- "x17",
- "x18",
- "x19",
- "x20",
- "x21",
- "x22",
- "x23",
- "x24",
- "x25",
- "x26",
- "x27",
- "x28",
- "fp",
- "lr",
- "sp",
+ "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11",
+ "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22",
+ "x23", "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp",
// 32-bit floating point regsisters
- "s0",
- "s1",
- "s2",
- "s3",
- "s4",
- "s5",
- "s6",
- "s7",
- "s8",
- "s9",
- "s10",
- "s11",
- "s12",
- "s13",
- "s14",
- "s15",
- "s16",
- "s17",
- "s18",
- "s19",
- "s20",
- "s21",
- "s22",
- "s23",
- "s24",
- "s25",
- "s26",
- "s27",
- "s28",
- "s29",
- "s30",
- "s31",
+ "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11",
+ "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
+ "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
// 64-bit floating point regsisters
- "d0",
- "d1",
- "d2",
- "d3",
- "d4",
- "d5",
- "d6",
- "d7",
- "d8",
- "d9",
- "d10",
- "d11",
- "d12",
- "d13",
- "d14",
- "d15",
- "d16",
- "d17",
- "d18",
- "d19",
- "d20",
- "d21",
- "d22",
- "d23",
- "d24",
- "d25",
- "d26",
- "d27",
- "d28",
- "d29",
- "d30",
- "d31",
+ "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11",
+ "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22",
+ "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
// Neon vector registers
- "v0",
- "v1",
- "v2",
- "v3",
- "v4",
- "v5",
- "v6",
- "v7",
- "v8",
- "v9",
- "v10",
- "v11",
- "v12",
- "v13",
- "v14",
- "v15",
- "v16",
- "v17",
- "v18",
- "v19",
- "v20",
- "v21",
- "v22",
- "v23",
- "v24",
- "v25",
- "v26",
- "v27",
- "v28",
- "v29",
- "v30",
- "v31",
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11",
+ "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22",
+ "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
// SVE vector registers
- "z0",
- "z1",
- "z2",
- "z3",
- "z4",
- "z5",
- "z6",
- "z7",
- "z8",
- "z9",
- "z10",
- "z11",
- "z12",
- "z13",
- "z14",
- "z15",
- "z16",
- "z17",
- "z18",
- "z19",
- "z20",
- "z21",
- "z22",
- "z23",
- "z24",
- "z25",
- "z26",
- "z27",
- "z28",
- "z29",
- "z30",
- "z31",
+ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", "z8", "z9", "z10",
+ "z11", "z12", "z13", "z14", "z15", "z16", "z17", "z18", "z19", "z20", "z21",
+ "z22", "z23", "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31",
// SVE predicate registers
- "p0",
- "p1",
- "p2",
- "p3",
- "p4",
- "p5",
- "p6",
- "p7",
- "p8",
- "p9",
- "p10",
- "p11",
- "p12",
- "p13",
- "p14",
- "p15",
+ "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10",
+ "p11", "p12", "p13", "p14", "p15",
// SVE predicate-as-counter registers
- "pn0",
- "pn1",
- "pn2",
- "pn3",
- "pn4",
- "pn5",
- "pn6",
- "pn7",
- "pn8",
- "pn9",
- "pn10",
- "pn11",
- "pn12",
- "pn13",
- "pn14",
- "pn15",
+ "pn0", "pn1", "pn2", "pn3", "pn4", "pn5", "pn6", "pn7", "pn8",
+ "pn9", "pn10", "pn11", "pn12", "pn13", "pn14", "pn15",
// SME registers
- "za",
- "zt0",
+ "za", "zt0",
+
+ // clang-format on
};
ArrayRef<const char *> AArch64TargetInfo::getGCCRegNames() const {
More information about the llvm-commits
mailing list