[llvm] [AArch64] Disable red-zone when lowering Q-reg copy through memory. (PR #94962)
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 10 10:03:42 PDT 2024
https://github.com/sdesmalen-arm updated https://github.com/llvm/llvm-project/pull/94962
>From 8e2a8c203a2821288d543da119f48751b3c8ad02 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Mon, 10 Jun 2024 11:06:14 +0000
Subject: [PATCH 1/2] [AArch64] Disable red-zone when lowering Q-reg copy
through memory.
This was pointed out in PR #93940.
---
llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index cd532671f5018..65e3bbf4e9c35 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -431,8 +431,15 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
uint64_t NumBytes = AFI->getLocalStackSize();
+ // If neither NEON or SVE are available, a COPY from one Q-reg to
+ // another requires a spill -> reload sequence. We can do that
+ // using a pre-decrementing store/post-decrementing load, but
+ // if we do so, we can't use the Red Zone.
+ bool LowerQRegCopyThroughMem =
+ !Subtarget.isNeonAvailable() && !Subtarget.hasSVE();
+
return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize ||
- getSVEStackSize(MF));
+ getSVEStackSize(MF) || LowerQRegCopyThroughMem);
}
/// hasFP - Return true if the specified function should have a dedicated frame
>From d4f576a1b5497eb108bf9104447fc3d34e8ccb9a Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Mon, 10 Jun 2024 15:58:42 +0000
Subject: [PATCH 2/2] Add hasFPARMv8 to avoid impact on soft-fp code
---
llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 5 +++--
llvm/test/CodeGen/AArch64/arm64-redzone.ll | 13 +++++++++++++
2 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 65e3bbf4e9c35..cf617c7e92a70 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -435,8 +435,9 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
// another requires a spill -> reload sequence. We can do that
// using a pre-decrementing store/post-decrementing load, but
// if we do so, we can't use the Red Zone.
- bool LowerQRegCopyThroughMem =
- !Subtarget.isNeonAvailable() && !Subtarget.hasSVE();
+ bool LowerQRegCopyThroughMem = Subtarget.hasFPARMv8() &&
+ !Subtarget.isNeonAvailable() &&
+ !Subtarget.hasSVE();
return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize ||
getSVEStackSize(MF) || LowerQRegCopyThroughMem);
diff --git a/llvm/test/CodeGen/AArch64/arm64-redzone.ll b/llvm/test/CodeGen/AArch64/arm64-redzone.ll
index fe30a1a98521e..d001bc2a8dbe4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-redzone.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-redzone.ll
@@ -16,3 +16,16 @@ define i32 @foo(i32 %a, i32 %b) nounwind ssp {
%tmp2 = load i32, ptr %x, align 4
ret i32 %tmp2
}
+
+; We disable red-zone if NEON is available because copies of Q-regs
+; require a spill/fill and dynamic allocation. But we only need to do
+; this when FP registers are enabled.
+define void @bar(fp128 %f) "target-features"="-fp-armv8" {
+; CHECK-LABEL: bar:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp x0, x1, [sp, #-16]
+; CHECK-NEXT: ret
+ %ptr = alloca fp128
+ store fp128 %f, ptr %ptr
+ ret void
+}
More information about the llvm-commits
mailing list