[llvm] [clang] [LLVM][AArch64] Add ASM constraints for reduced GPR register ranges. (PR #70970)
via cfe-commits
cfe-commits at lists.llvm.org
Thu Nov 2 05:48:39 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
@llvm/pr-subscribers-llvm-ir
Author: Paul Walker (paulwalker-arm)
<details>
<summary>Changes</summary>
[LLVM][AArch64] Add ASM constraints for reduced GPR register ranges.
The patch adds the follow ASM constraints:
Uci => w8-w11
Ucj => w12-w15
These constraints are required for SME load/store instructions
where a reduced set of GPRs are used to specify ZA array vectors.
NOTE: GCC has agreed to use the same constraint syntax.
---
Full diff: https://github.com/llvm/llvm-project/pull/70970.diff
6 Files Affected:
- (modified) clang/docs/ReleaseNotes.rst (+2)
- (modified) clang/lib/Basic/Targets/AArch64.cpp (+6)
- (modified) clang/test/CodeGen/aarch64-inline-asm.c (+15)
- (modified) llvm/docs/LangRef.rst (+2)
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+45-13)
- (added) llvm/test/CodeGen/AArch64/inlineasm-Uc-constraint.ll (+78)
``````````diff
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 4696836b3a00caa..afe7e2e79c2d087 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -738,6 +738,8 @@ Arm and AArch64 Support
This affects C++ functions with SVE ACLE parameters. Clang will use the old
manglings if ``-fclang-abi-compat=17`` or lower is specified.
+- New AArch64 asm constraints have been added for r8-r11(Uci) and r12-r15(Ucj).
+
Android Support
^^^^^^^^^^^^^^^
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index fe5a7af97b7753c..c71af71eba60ce2 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -1306,6 +1306,12 @@ bool AArch64TargetInfo::validateAsmConstraint(
Name += 2;
return true;
}
+ if (Name[1] == 'c' && (Name[2] == 'i' || Name[2] == 'j')) {
+ // Gpr registers ("Uci"=w8-11, "Ucj"=w12-15)
+ Info.setAllowsRegister();
+ Name += 2;
+ return true;
+ }
// Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes.
// Utf: A memory address suitable for ldp/stp in TF mode.
// Usa: An absolute symbolic address.
diff --git a/clang/test/CodeGen/aarch64-inline-asm.c b/clang/test/CodeGen/aarch64-inline-asm.c
index 439fb9e33f9ae15..75e9a8c46b87692 100644
--- a/clang/test/CodeGen/aarch64-inline-asm.c
+++ b/clang/test/CodeGen/aarch64-inline-asm.c
@@ -80,3 +80,18 @@ void test_tied_earlyclobber(void) {
asm("" : "+&r"(a));
// CHECK: call i32 asm "", "=&{x1},0"(i32 %0)
}
+
+void test_reduced_gpr_constraints(int var32, long var64) {
+ asm("add w0, w0, %0" : : "Uci"(var32) : "w0");
+// CHECK: [[ARG1:%.+]] = load i32, ptr
+// CHECK: call void asm sideeffect "add w0, w0, $0", "@3Uci,~{w0}"(i32 [[ARG1]])
+ asm("add x0, x0, %0" : : "Uci"(var64) : "x0");
+// CHECK: [[ARG1:%.+]] = load i64, ptr
+// CHECK: call void asm sideeffect "add x0, x0, $0", "@3Uci,~{x0}"(i64 [[ARG1]])
+ asm("add w0, w0, %0" : : "Ucj"(var32) : "w0");
+// CHECK: [[ARG2:%.+]] = load i32, ptr
+// CHECK: call void asm sideeffect "add w0, w0, $0", "@3Ucj,~{w0}"(i32 [[ARG2]])
+ asm("add x0, x0, %0" : : "Ucj"(var64) : "x0");
+// CHECK: [[ARG2:%.+]] = load i64, ptr
+// CHECK: call void asm sideeffect "add x0, x0, $0", "@3Ucj,~{x0}"(i64 [[ARG2]])
+}
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 9c8e264eb9b9785..440ae0e292f3938 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -5098,6 +5098,8 @@ AArch64:
offsets). (However, LLVM currently does this for the ``m`` constraint as
well.)
- ``r``: A 32 or 64-bit integer register (W* or X*).
+- ``Uci``: Like r, but restricted to registers 8 to 11 inclusive.
+- ``Ucj``: Like r, but restricted to registers 12 to 15 inclusive.
- ``w``: A 32, 64, or 128-bit floating-point, SIMD or SVE vector register.
- ``x``: Like w, but restricted to registers 0 to 15 inclusive.
- ``y``: Like w, but restricted to SVE vector registers Z0 to Z7 inclusive.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 291f0c8c5d991c6..f5193a9f2adf30c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10163,14 +10163,15 @@ const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
return "r";
}
-enum PredicateConstraint { Uph, Upl, Upa, Invalid };
+enum class PredicateConstraint { Uph, Upl, Upa };
-static PredicateConstraint parsePredicateConstraint(StringRef Constraint) {
- return StringSwitch<PredicateConstraint>(Constraint)
+static std::optional<PredicateConstraint>
+parsePredicateConstraint(StringRef Constraint) {
+ return StringSwitch<std::optional<PredicateConstraint>>(Constraint)
.Case("Uph", PredicateConstraint::Uph)
.Case("Upl", PredicateConstraint::Upl)
.Case("Upa", PredicateConstraint::Upa)
- .Default(PredicateConstraint::Invalid);
+ .Default(std::nullopt);
}
static const TargetRegisterClass *
@@ -10180,8 +10181,6 @@ getPredicateRegisterClass(PredicateConstraint Constraint, EVT VT) {
return nullptr;
switch (Constraint) {
- default:
- return nullptr;
case PredicateConstraint::Uph:
return VT == MVT::aarch64svcount ? &AArch64::PNR_p8to15RegClass
: &AArch64::PPR_p8to15RegClass;
@@ -10192,6 +10191,33 @@ getPredicateRegisterClass(PredicateConstraint Constraint, EVT VT) {
return VT == MVT::aarch64svcount ? &AArch64::PNRRegClass
: &AArch64::PPRRegClass;
}
+
+ llvm_unreachable("Missing PredicateConstraint!");
+}
+
+enum class ReducedGprConstraint { Uci, Ucj };
+
+static std::optional<ReducedGprConstraint>
+parseReducedGprConstraint(StringRef Constraint) {
+ return StringSwitch<std::optional<ReducedGprConstraint>>(Constraint)
+ .Case("Uci", ReducedGprConstraint::Uci)
+ .Case("Ucj", ReducedGprConstraint::Ucj)
+ .Default(std::nullopt);
+}
+
+static const TargetRegisterClass *
+getReducedGprRegisterClass(ReducedGprConstraint Constraint, EVT VT) {
+ if (!VT.isScalarInteger() || VT.getFixedSizeInBits() > 64)
+ return nullptr;
+
+ switch (Constraint) {
+ case ReducedGprConstraint::Uci:
+ return &AArch64::MatrixIndexGPR32_8_11RegClass;
+ case ReducedGprConstraint::Ucj:
+ return &AArch64::MatrixIndexGPR32_12_15RegClass;
+ }
+
+ llvm_unreachable("Missing ReducedGprConstraint!");
}
// The set of cc code supported is from
@@ -10289,9 +10315,10 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
case 'S': // A symbolic address
return C_Other;
}
- } else if (parsePredicateConstraint(Constraint) !=
- PredicateConstraint::Invalid)
- return C_RegisterClass;
+ } else if (parsePredicateConstraint(Constraint))
+ return C_RegisterClass;
+ else if (parseReducedGprConstraint(Constraint))
+ return C_RegisterClass;
else if (parseConstraintCode(Constraint) != AArch64CC::Invalid)
return C_Other;
return TargetLowering::getConstraintType(Constraint);
@@ -10325,7 +10352,8 @@ AArch64TargetLowering::getSingleConstraintMatchWeight(
weight = CW_Constant;
break;
case 'U':
- if (parsePredicateConstraint(constraint) != PredicateConstraint::Invalid)
+ if (parsePredicateConstraint(constraint) ||
+ parseReducedGprConstraint(constraint))
weight = CW_Register;
break;
}
@@ -10382,9 +10410,13 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
break;
}
} else {
- PredicateConstraint PC = parsePredicateConstraint(Constraint);
- if (const TargetRegisterClass *RegClass = getPredicateRegisterClass(PC, VT))
- return std::make_pair(0U, RegClass);
+ if (const auto PC = parsePredicateConstraint(Constraint))
+ if (const auto *RegClass = getPredicateRegisterClass(*PC, VT))
+ return std::make_pair(0U, RegClass);
+
+ if (const auto RGC = parseReducedGprConstraint(Constraint))
+ if (const auto *RegClass = getReducedGprRegisterClass(*RGC, VT))
+ return std::make_pair(0U, RegClass);
}
if (StringRef("{cc}").equals_insensitive(Constraint) ||
parseConstraintCode(Constraint) != AArch64CC::Invalid)
diff --git a/llvm/test/CodeGen/AArch64/inlineasm-Uc-constraint.ll b/llvm/test/CodeGen/AArch64/inlineasm-Uc-constraint.ll
new file mode 100644
index 000000000000000..0bee7ea40cc1aeb
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/inlineasm-Uc-constraint.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc < %s -o - | FileCheck %s
+
+target triple = "arm64-none-linux-gnu"
+
+define void @test_constraints_Uci_w(i32 %a) {
+; CHECK-LABEL: test_constraints_Uci_w:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: //APP
+; CHECK-NEXT: add x0, x0, x8
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: ret
+ call void asm sideeffect "add x0, x0, $0", "@3Uci,~{x0}"(i32 %a)
+ ret void
+}
+
+; As test_constraints_Uci_w but ensures non-legal types are also covered.
+define void @test_constraints_Uci_w_i8(i8 %a) {
+; CHECK-LABEL: test_constraints_Uci_w_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: //APP
+; CHECK-NEXT: add x0, x0, x8
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: ret
+ call void asm sideeffect "add x0, x0, $0", "@3Uci,~{x0}"(i8 %a)
+ ret void
+}
+
+define void @test_constraints_Uci_x(i64 %a) {
+; CHECK-LABEL: test_constraints_Uci_x:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, x0
+; CHECK-NEXT: //APP
+; CHECK-NEXT: add x0, x0, x8
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: ret
+ call void asm sideeffect "add x0, x0, $0", "@3Uci,~{x0}"(i64 %a)
+ ret void
+}
+
+define void @test_constraint_Ucj_w(i32 %a) {
+; CHECK-LABEL: test_constraint_Ucj_w:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: //APP
+; CHECK-NEXT: add x0, x0, x12
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: ret
+ call void asm sideeffect "add x0, x0, $0", "@3Ucj,~{x0}"(i32 %a)
+ ret void
+}
+
+; As test_constraints_Ucj_w but ensures non-legal types are also covered.
+define void @test_constraint_Ucj_w_i8(i8 %a) {
+; CHECK-LABEL: test_constraint_Ucj_w_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: //APP
+; CHECK-NEXT: add x0, x0, x12
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: ret
+ call void asm sideeffect "add x0, x0, $0", "@3Ucj,~{x0}"(i8 %a)
+ ret void
+}
+
+define void @test_constraint_Ucj_x(i64 %a) {
+; CHECK-LABEL: test_constraint_Ucj_x:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x12, x0
+; CHECK-NEXT: //APP
+; CHECK-NEXT: add x0, x0, x12
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: ret
+ call void asm sideeffect "add x0, x0, $0", "@3Ucj,~{x0}"(i64 %a)
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/70970
More information about the cfe-commits
mailing list