[llvm] db0e376 - [AArch64] Fix failure with inline asm and svcount (#112537)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 24 09:41:11 PDT 2024
Author: Sander de Smalen
Date: 2024-10-24T17:41:07+01:00
New Revision: db0e3760442324d48adb68228d1939667807b2b8
URL: https://github.com/llvm/llvm-project/commit/db0e3760442324d48adb68228d1939667807b2b8
DIFF: https://github.com/llvm/llvm-project/commit/db0e3760442324d48adb68228d1939667807b2b8.diff
LOG: [AArch64] Fix failure with inline asm and svcount (#112537)
This fixes an issue where the compiler runs into an assertion failure
for the following example:
register svcount_t pred asm("pn8") = svptrue_c8();
asm("ld1w { z0.s, z4.s, z8.s, z12.s }, %[pred]/z, [x0]\n"
:
: [pred] "Uph" (pred)
: "memory", "cc");
Here the register constraint that ends up in the LLVM IR is "{pn8}", but
the code in `TargetRegisterInfo::getRegForInlineAsmConstraint` that
parses that string, follows a path where it queries a suitable register
class for this register (<=> PPRorPNR regclass), for which it then
chooses `nxv16i1` as a suitable type. These choices individually are
correct, but the combined result isn't, because the type should be
`aarch64svcount`.
This then results in issues later on in SelectionDAGBuilder.cpp in
CopyToReg because the type of the actual value and the computed type
from the constraint don't match.
This PR pre-empts this issue by parsing the predicate explicitly and
returning the correct register class.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/aarch64-sve-asm.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9adb11292376ce..5a848ada9dd8ee 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11867,6 +11867,36 @@ const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
enum class PredicateConstraint { Uph, Upl, Upa };
+// Returns a {Reg, RegisterClass} tuple if the constraint is
+// a specific predicate register.
+//
+// For some constraint like "{pn3}" the default path in
+// TargetLowering::getRegForInlineAsmConstraint() leads it to determine that a
+// suitable register class for this register is "PPRorPNR", after which it
+// determines that nxv16i1 is an appropriate type for the constraint, which is
+// not what we want. The code here pre-empts this by matching the register
+// explicitly.
+static std::optional<std::pair<unsigned, const TargetRegisterClass *>>
+parsePredicateRegAsConstraint(StringRef Constraint) {
+ if (!Constraint.starts_with('{') || !Constraint.ends_with('}') ||
+ Constraint[1] != 'p')
+ return std::nullopt;
+
+ Constraint = Constraint.substr(2, Constraint.size() - 3);
+ bool IsPredicateAsCount = Constraint.starts_with("n");
+ if (IsPredicateAsCount)
+ Constraint = Constraint.drop_front(1);
+
+ unsigned V;
+ if (Constraint.getAsInteger(10, V) || V > 31)
+ return std::nullopt;
+
+ if (IsPredicateAsCount)
+ return std::make_pair(AArch64::PN0 + V, &AArch64::PNRRegClass);
+ else
+ return std::make_pair(AArch64::P0 + V, &AArch64::PPRRegClass);
+}
+
static std::optional<PredicateConstraint>
parsePredicateConstraint(StringRef Constraint) {
return StringSwitch<std::optional<PredicateConstraint>>(Constraint)
@@ -12114,6 +12144,8 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
break;
}
} else {
+ if (const auto P = parsePredicateRegAsConstraint(Constraint))
+ return *P;
if (const auto PC = parsePredicateConstraint(Constraint))
if (const auto *RegClass = getPredicateRegisterClass(*PC, VT))
return std::make_pair(0U, RegClass);
diff --git a/llvm/test/CodeGen/AArch64/aarch64-sve-asm.ll b/llvm/test/CodeGen/AArch64/aarch64-sve-asm.ll
index 9f8897575b3d58..ff66206228a4aa 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-sve-asm.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-sve-asm.ll
@@ -119,3 +119,67 @@ define <vscale x 8 x half> @test_svfadd_f16_Uph_constraint(<vscale x 16 x i1> %P
%1 = tail call <vscale x 8 x half> asm "fadd $0.h, $1/m, $2.h, $3.h", "=w, at 3Uph,w,w"(<vscale x 16 x i1> %Pg, <vscale x 8 x half> %Zn, <vscale x 8 x half> %Zm)
ret <vscale x 8 x half> %1
}
+
+define void @explicit_p0(ptr %p) {
+ ; CHECK-LABEL: name: explicit_p0
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[PTRUE_B:%[0-9]+]]:ppr = PTRUE_B 31, implicit $vg
+ ; CHECK-NEXT: $p0 = COPY [[PTRUE_B]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]]
+ ; CHECK-NEXT: INLINEASM &"ld4w { z0.s, z1.s, z2.s, z3.s }, $1/z, [$0]", 1 /* sideeffect attdialect */, 3538954 /* regdef:GPR64common */, def %1, 9 /* reguse */, $p0, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
+ ; CHECK-NEXT: RET_ReallyLR
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.b8(i32 31)
+ %2 = tail call i64 asm sideeffect "ld4w { z0.s, z1.s, z2.s, z3.s }, $1/z, [$0]", "=r,{p0},0"(<vscale x 16 x i1> %1, ptr %p)
+ ret void
+}
+
+define void @explicit_p8_invalid(ptr %p) {
+ ; CHECK-LABEL: name: explicit_p8_invalid
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[PTRUE_B:%[0-9]+]]:ppr = PTRUE_B 31, implicit $vg
+ ; CHECK-NEXT: $p8 = COPY [[PTRUE_B]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]]
+ ; CHECK-NEXT: INLINEASM &"ld4w { z0.s, z1.s, z2.s, z3.s }, $1/z, [$0]", 1 /* sideeffect attdialect */, 3538954 /* regdef:GPR64common */, def %1, 9 /* reguse */, $p8, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
+ ; CHECK-NEXT: RET_ReallyLR
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.b8(i32 31)
+ %2 = tail call i64 asm sideeffect "ld4w { z0.s, z1.s, z2.s, z3.s }, $1/z, [$0]", "=r,{p8},0"(<vscale x 16 x i1> %1, ptr %p)
+ ret void
+}
+
+define void @explicit_pn8(ptr %p) {
+ ; CHECK-LABEL: name: explicit_pn8
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[PTRUE_C_B:%[0-9]+]]:pnr_p8to15 = PTRUE_C_B implicit $vg
+ ; CHECK-NEXT: $pn8 = COPY [[PTRUE_C_B]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]]
+ ; CHECK-NEXT: INLINEASM &"ld1w { z0.s, z4.s, z8.s, z12.s }, $1/z, [$0]", 1 /* sideeffect attdialect */, 3538954 /* regdef:GPR64common */, def %1, 9 /* reguse */, $pn8, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
+ ; CHECK-NEXT: RET_ReallyLR
+ %1 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
+ %2 = tail call i64 asm sideeffect "ld1w { z0.s, z4.s, z8.s, z12.s }, $1/z, [$0]", "=r,{pn8},0"(target("aarch64.svcount") %1, ptr %p)
+ ret void
+}
+
+define void @explicit_pn0_invalid(ptr %p) {
+ ; CHECK-LABEL: name: explicit_pn0_invalid
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[PTRUE_C_B:%[0-9]+]]:pnr_p8to15 = PTRUE_C_B implicit $vg
+ ; CHECK-NEXT: $pn0 = COPY [[PTRUE_C_B]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]]
+ ; CHECK-NEXT: INLINEASM &"ld1w { z0.s, z4.s, z8.s, z12.s }, $1/z, [$0]", 1 /* sideeffect attdialect */, 3538954 /* regdef:GPR64common */, def %1, 9 /* reguse */, $pn0, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
+ ; CHECK-NEXT: RET_ReallyLR
+ %1 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
+ %2 = tail call i64 asm sideeffect "ld1w { z0.s, z4.s, z8.s, z12.s }, $1/z, [$0]", "=r,{pn0},0"(target("aarch64.svcount") %1, ptr %p)
+ ret void
+}
More information about the llvm-commits
mailing list