[llvm] [AArch64] Fix range check for STGPostIndex (PR #117146)
Oliver Stannard via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 21 04:05:23 PST 2024
https://github.com/ostannard created https://github.com/llvm/llvm-project/pull/117146
When generating function epilogues using AArch64 stack tagging, we can fold an SP update into the tag-setting loop. The loop tags 32 bytes at a time using ST2G, so the final SP update might be done either by a post indexed STG which tags the final 16 bytes of the tagged region, or by an ADD/SUB instruction after the loop. However, we were only considering the range of the ADD/SUB instructions when deciding whether to do this, and the valid immediate range for STG is slightly lower when the offset is positive, because it is a signed immediate, and must include the extra 16 bytes being tagged.
>From da334d363abb1cc4f8592bb69b62c5987168f4f7 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Thu, 21 Nov 2024 11:36:54 +0000
Subject: [PATCH 1/3] Add test showing bug
---
.../AArch64/stack-tagging-epilogue-fold.mir | 71 +++++++++++++++++++
1 file changed, 71 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/stack-tagging-epilogue-fold.mir
diff --git a/llvm/test/CodeGen/AArch64/stack-tagging-epilogue-fold.mir b/llvm/test/CodeGen/AArch64/stack-tagging-epilogue-fold.mir
new file mode 100644
index 00000000000000..00c0bdaabaa4af
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/stack-tagging-epilogue-fold.mir
@@ -0,0 +1,71 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=aarch64 -mattr=+mte -run-pass=prologepilog %s -o - | FileCheck %s
+
+--- |
+ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+ target triple = "aarch64-arm-none-eabi"
+
+ define void @F76(i32 %0, ...) #0 {
+ ret void
+ }
+
+...
+---
+name: F76
+fixedStack:
+ - { id: 0, type: default, offset: 0, size: 4, alignment: 16, stack-id: default,
+ isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+stack:
+ - { id: 0, name: '', type: default, offset: 0, size: 4080, alignment: 16,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: -4080, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+ - { id: 1, name: '', type: default, offset: 0, size: 32, alignment: 16,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: -4112, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+ - { id: 2, name: '', type: default, offset: 0, size: 3888, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: -8000, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+ - { id: 3, name: '', type: default, offset: 0, size: 56, alignment: 8,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: -8056, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+ - { id: 4, name: '', type: default, offset: 0, size: 128, alignment: 16,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: -8192, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+body: |
+ bb.0 (%ir-block.1):
+ liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7, $x1, $x2, $x3, $x4, $x5, $x6, $x7
+
+ ; CHECK-LABEL: name: F76
+ ; CHECK: liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $fp
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.5)
+ ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 2, 12
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8208
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
+ ; CHECK-NEXT: renamable $x8 = IRGstack $sp, $xzr
+ ; CHECK-NEXT: renamable $x9 = TAGPstack $x8, 2, renamable $x8, 1
+ ; CHECK-NEXT: renamable $x10 = COPY renamable $x9
+ ; CHECK-NEXT: dead early-clobber renamable $x11, dead early-clobber renamable $x10 = STGloop_wback 4080, killed renamable $x10, implicit-def dead $nzcv
+ ; CHECK-NEXT: ST2Gi renamable $x8, renamable $x8, 0
+ ; CHECK-NEXT: dead early-clobber $x8, early-clobber $sp = frame-destroy STGloop_wback 4096, $sp, implicit-def $nzcv
+ ; CHECK-NEXT: $sp = frame-destroy STGPostIndex $sp, $sp, 256
+ ; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.5)
+ ; CHECK-NEXT: RET_ReallyLR
+ /* Prologue */
+ renamable $x8 = IRGstack $sp, $xzr
+ renamable $x9 = TAGPstack %stack.0, 0, renamable $x8, 1
+ renamable $x10 = COPY renamable $x9
+ dead early-clobber renamable $x11, dead early-clobber renamable $x10 = STGloop_wback 4080, killed renamable $x10, implicit-def dead $nzcv
+ ST2Gi renamable $x8, renamable $x8, 0
+
+ /* Epilogue */
+ dead early-clobber renamable $x8, dead early-clobber renamable $x9 = STGloop 4080, %stack.0, implicit-def dead $nzcv
+ ST2Gi $sp, %stack.1, 0
+ RET_ReallyLR
+...
>From 7c959b33f1f8f0ccf5d33730f0bcef5fc3a9f45d Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Thu, 21 Nov 2024 11:38:27 +0000
Subject: [PATCH 2/3] [AArch64] Fix range check for STGPostIndex
When generating function epilogues using AArch64 stack tagging, we can
fold an SP update into the tag-setting loop. The loop tags 32 bytes at a
time using ST2G, so the final SP update might be done either by a post
indexed STG which tags the final 16 bytes of the tagged region, or by an
ADD/SUB instruction after the loop. However, we were only considering
the range of the ADD/SUB instructions when deciding whether to do this,
and the valid immediate range for STG is slightly lower when the offset
is positive.
---
.../Target/AArch64/AArch64FrameLowering.cpp | 18 ++++++++++++++----
.../AArch64/stack-tagging-epilogue-fold.mir | 5 +++--
2 files changed, 17 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 216244950ba9ee..18acf62596c04c 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -4393,10 +4393,20 @@ bool canMergeRegUpdate(MachineBasicBlock::iterator II, unsigned Reg,
int64_t Offset = MI.getOperand(2).getImm() << Shift;
if (MI.getOpcode() == AArch64::SUBXri)
Offset = -Offset;
- int64_t AbsPostOffset = std::abs(Offset - Size);
- const int64_t kMaxOffset =
- 0xFFF; // Max encoding for unshifted ADDXri / SUBXri
- if (AbsPostOffset <= kMaxOffset && AbsPostOffset % 16 == 0) {
+ int64_t PostOffset = Offset - Size;
+ // TagStoreEdit::emitLoop might emit either an ADD/SUB after the loop, or
+ // an STGPostIndex which does the last 16 bytes of tag write. Which one is
+ // chosen depends on the alignment of the loop size, but the difference
+ // between the valid ranges for the two instructions is small, so we
+ // conservatively assume that it could be either case here.
+ //
+ // Max offset of STGPostIndex, minus the 16 byte tag write folded into that
+ // instruction.
+ const int64_t kMaxOffset = 4080 - 16;
+ // Max offset of SUBXri.
+ const int64_t kMinOffset = -4095;
+ if (PostOffset <= kMaxOffset && PostOffset >= kMinOffset &&
+ PostOffset % 16 == 0) {
*TotalOffset = Offset;
return true;
}
diff --git a/llvm/test/CodeGen/AArch64/stack-tagging-epilogue-fold.mir b/llvm/test/CodeGen/AArch64/stack-tagging-epilogue-fold.mir
index 00c0bdaabaa4af..bcd716c5c8d6c0 100644
--- a/llvm/test/CodeGen/AArch64/stack-tagging-epilogue-fold.mir
+++ b/llvm/test/CodeGen/AArch64/stack-tagging-epilogue-fold.mir
@@ -53,8 +53,9 @@ body: |
; CHECK-NEXT: renamable $x10 = COPY renamable $x9
; CHECK-NEXT: dead early-clobber renamable $x11, dead early-clobber renamable $x10 = STGloop_wback 4080, killed renamable $x10, implicit-def dead $nzcv
; CHECK-NEXT: ST2Gi renamable $x8, renamable $x8, 0
- ; CHECK-NEXT: dead early-clobber $x8, early-clobber $sp = frame-destroy STGloop_wback 4096, $sp, implicit-def $nzcv
- ; CHECK-NEXT: $sp = frame-destroy STGPostIndex $sp, $sp, 256
+ ; CHECK-NEXT: $x9 = ADDXri $sp, 0, 0
+ ; CHECK-NEXT: dead early-clobber $x8, dead early-clobber $x9 = STGloop_wback 4112, $x9, implicit-def $nzcv
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 2, 12
; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.5)
; CHECK-NEXT: RET_ReallyLR
/* Prologue */
>From 9b95f600e56b283f7238fa8048d92a4cb80450db Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Thu, 21 Nov 2024 11:01:38 +0000
Subject: [PATCH 3/3] [AArch64] Add assertions and debug trace (NFC)
---
llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 18acf62596c04c..e6057d8a028849 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -4355,26 +4355,37 @@ void TagStoreEdit::emitLoop(MachineBasicBlock::iterator InsertI) {
int64_t ExtraBaseRegUpdate =
FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getFixed() - Size) : 0;
+ LLVM_DEBUG(dbgs() << "TagStoreEdit::emitLoop: LoopSize=" << LoopSize
+ << ", Size=" << Size
+ << ", ExtraBaseRegUpdate=" << ExtraBaseRegUpdate
+ << ", FrameRegUpdate=" << FrameRegUpdate
+ << ", FrameRegOffset.getFixed()="
+ << FrameRegOffset.getFixed() << "\n");
if (LoopSize < Size) {
assert(FrameRegUpdate);
assert(Size - LoopSize == 16);
// Tag 16 more bytes at BaseReg and update BaseReg.
+ int64_t STGOffset = ExtraBaseRegUpdate + 16;
+ assert(STGOffset % 16 == 0 && STGOffset >= -4096 && STGOffset <= 4080 &&
+ "STG immediate out of range");
BuildMI(*MBB, InsertI, DL,
TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex))
.addDef(BaseReg)
.addReg(BaseReg)
.addReg(BaseReg)
- .addImm(1 + ExtraBaseRegUpdate / 16)
+ .addImm(STGOffset / 16)
.setMemRefs(CombinedMemRefs)
.setMIFlags(FrameRegUpdateFlags);
} else if (ExtraBaseRegUpdate) {
// Update BaseReg.
+ int64_t AddSubOffset = std::abs(ExtraBaseRegUpdate);
+ assert(AddSubOffset <= 4095 && "ADD/SUB immediate out of range");
BuildMI(
*MBB, InsertI, DL,
TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri))
.addDef(BaseReg)
.addReg(BaseReg)
- .addImm(std::abs(ExtraBaseRegUpdate))
+ .addImm(AddSubOffset)
.addImm(0)
.setMIFlags(FrameRegUpdateFlags);
}
More information about the llvm-commits
mailing list