[llvm] [AArch64] Fix range check for STGPostIndex (PR #117146)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 21 04:05:48 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Oliver Stannard (ostannard)
<details>
<summary>Changes</summary>
When generating function epilogues using AArch64 stack tagging, we can fold an SP update into the tag-setting loop. The loop tags 32 bytes at a time using ST2G, so the final SP update might be done either by a post indexed STG which tags the final 16 bytes of the tagged region, or by an ADD/SUB instruction after the loop. However, we were only considering the range of the ADD/SUB instructions when deciding whether to do this, and the valid immediate range for STG is slightly lower when the offset is positive, because it is a signed immediate, and must include the extra 16 bytes being tagged.
---
Full diff: https://github.com/llvm/llvm-project/pull/117146.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (+27-6)
- (added) llvm/test/CodeGen/AArch64/stack-tagging-epilogue-fold.mir (+72)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 216244950ba9ee..e6057d8a028849 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -4355,26 +4355,37 @@ void TagStoreEdit::emitLoop(MachineBasicBlock::iterator InsertI) {
int64_t ExtraBaseRegUpdate =
FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getFixed() - Size) : 0;
+ LLVM_DEBUG(dbgs() << "TagStoreEdit::emitLoop: LoopSize=" << LoopSize
+ << ", Size=" << Size
+ << ", ExtraBaseRegUpdate=" << ExtraBaseRegUpdate
+ << ", FrameRegUpdate=" << FrameRegUpdate
+ << ", FrameRegOffset.getFixed()="
+ << FrameRegOffset.getFixed() << "\n");
if (LoopSize < Size) {
assert(FrameRegUpdate);
assert(Size - LoopSize == 16);
// Tag 16 more bytes at BaseReg and update BaseReg.
+ int64_t STGOffset = ExtraBaseRegUpdate + 16;
+ assert(STGOffset % 16 == 0 && STGOffset >= -4096 && STGOffset <= 4080 &&
+ "STG immediate out of range");
BuildMI(*MBB, InsertI, DL,
TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex))
.addDef(BaseReg)
.addReg(BaseReg)
.addReg(BaseReg)
- .addImm(1 + ExtraBaseRegUpdate / 16)
+ .addImm(STGOffset / 16)
.setMemRefs(CombinedMemRefs)
.setMIFlags(FrameRegUpdateFlags);
} else if (ExtraBaseRegUpdate) {
// Update BaseReg.
+ int64_t AddSubOffset = std::abs(ExtraBaseRegUpdate);
+ assert(AddSubOffset <= 4095 && "ADD/SUB immediate out of range");
BuildMI(
*MBB, InsertI, DL,
TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri))
.addDef(BaseReg)
.addReg(BaseReg)
- .addImm(std::abs(ExtraBaseRegUpdate))
+ .addImm(AddSubOffset)
.addImm(0)
.setMIFlags(FrameRegUpdateFlags);
}
@@ -4393,10 +4404,20 @@ bool canMergeRegUpdate(MachineBasicBlock::iterator II, unsigned Reg,
int64_t Offset = MI.getOperand(2).getImm() << Shift;
if (MI.getOpcode() == AArch64::SUBXri)
Offset = -Offset;
- int64_t AbsPostOffset = std::abs(Offset - Size);
- const int64_t kMaxOffset =
- 0xFFF; // Max encoding for unshifted ADDXri / SUBXri
- if (AbsPostOffset <= kMaxOffset && AbsPostOffset % 16 == 0) {
+ int64_t PostOffset = Offset - Size;
+ // TagStoreEdit::emitLoop might emit either an ADD/SUB after the loop, or
+ // an STGPostIndex which does the last 16 bytes of tag write. Which one is
+ // chosen depends on the alignment of the loop size, but the difference
+ // between the valid ranges for the two instructions is small, so we
+ // conservatively assume that it could be either case here.
+ //
+ // Max offset of STGPostIndex, minus the 16 byte tag write folded into that
+ // instruction.
+ const int64_t kMaxOffset = 4080 - 16;
+ // Max offset of SUBXri.
+ const int64_t kMinOffset = -4095;
+ if (PostOffset <= kMaxOffset && PostOffset >= kMinOffset &&
+ PostOffset % 16 == 0) {
*TotalOffset = Offset;
return true;
}
diff --git a/llvm/test/CodeGen/AArch64/stack-tagging-epilogue-fold.mir b/llvm/test/CodeGen/AArch64/stack-tagging-epilogue-fold.mir
new file mode 100644
index 00000000000000..bcd716c5c8d6c0
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/stack-tagging-epilogue-fold.mir
@@ -0,0 +1,72 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=aarch64 -mattr=+mte -run-pass=prologepilog %s -o - | FileCheck %s
+
+--- |
+ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+ target triple = "aarch64-arm-none-eabi"
+
+ define void @F76(i32 %0, ...) #0 {
+ ret void
+ }
+
+...
+---
+name: F76
+fixedStack:
+ - { id: 0, type: default, offset: 0, size: 4, alignment: 16, stack-id: default,
+ isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+stack:
+ - { id: 0, name: '', type: default, offset: 0, size: 4080, alignment: 16,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: -4080, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+ - { id: 1, name: '', type: default, offset: 0, size: 32, alignment: 16,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: -4112, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+ - { id: 2, name: '', type: default, offset: 0, size: 3888, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: -8000, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+ - { id: 3, name: '', type: default, offset: 0, size: 56, alignment: 8,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: -8056, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+ - { id: 4, name: '', type: default, offset: 0, size: 128, alignment: 16,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: -8192, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+body: |
+ bb.0 (%ir-block.1):
+ liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7, $x1, $x2, $x3, $x4, $x5, $x6, $x7
+
+ ; CHECK-LABEL: name: F76
+ ; CHECK: liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $fp
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.5)
+ ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 2, 12
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8208
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
+ ; CHECK-NEXT: renamable $x8 = IRGstack $sp, $xzr
+ ; CHECK-NEXT: renamable $x9 = TAGPstack $x8, 2, renamable $x8, 1
+ ; CHECK-NEXT: renamable $x10 = COPY renamable $x9
+ ; CHECK-NEXT: dead early-clobber renamable $x11, dead early-clobber renamable $x10 = STGloop_wback 4080, killed renamable $x10, implicit-def dead $nzcv
+ ; CHECK-NEXT: ST2Gi renamable $x8, renamable $x8, 0
+ ; CHECK-NEXT: $x9 = ADDXri $sp, 0, 0
+ ; CHECK-NEXT: dead early-clobber $x8, dead early-clobber $x9 = STGloop_wback 4112, $x9, implicit-def $nzcv
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 2, 12
+ ; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.5)
+ ; CHECK-NEXT: RET_ReallyLR
+ /* Prologue */
+ renamable $x8 = IRGstack $sp, $xzr
+ renamable $x9 = TAGPstack %stack.0, 0, renamable $x8, 1
+ renamable $x10 = COPY renamable $x9
+ dead early-clobber renamable $x11, dead early-clobber renamable $x10 = STGloop_wback 4080, killed renamable $x10, implicit-def dead $nzcv
+ ST2Gi renamable $x8, renamable $x8, 0
+
+ /* Epilogue */
+ dead early-clobber renamable $x8, dead early-clobber renamable $x9 = STGloop 4080, %stack.0, implicit-def dead $nzcv
+ ST2Gi $sp, %stack.1, 0
+ RET_ReallyLR
+...
``````````
</details>
https://github.com/llvm/llvm-project/pull/117146
More information about the llvm-commits
mailing list