[llvm] 7d07405 - [AArch64] Prefer prologues with sp adjustments merged into stp/ldp for WinCFI, if optimizing for size

Martin Storsjö via llvm-commits llvm-commits at lists.llvm.org
Sat Oct 3 11:38:09 PDT 2020


Author: Martin Storsjö
Date: 2020-10-03T21:37:22+03:00
New Revision: 7d07405761aec8434a0cdb1c5644823a394f7def

URL: https://github.com/llvm/llvm-project/commit/7d07405761aec8434a0cdb1c5644823a394f7def
DIFF: https://github.com/llvm/llvm-project/commit/7d07405761aec8434a0cdb1c5644823a394f7def.diff

LOG: [AArch64] Prefer prologues with sp adjustments merged into stp/ldp for WinCFI, if optimizing for size

This makes the prologue match the windows canonical layout, for
cases without a frame pointer.

This can potentially be a slower (a longer dependency chain of the
sp register, and potentially one arithmetic operation more on some
cores), but gives notable size improvements.

The previous two commits shrinks a 166 KB xdata section by 49 KB,
and if the change from this commit is enabled, it shrinks the xdata
section by another 25 KB.

In total, since the start of the recent arm64 unwind info cleanups
and optimizations (since before commit 37ef743cbf3), the xdata+pdata
sections of the same test DLL has shrunk from 407 KB in total
originally, to 163 KB now.

Differential Revision: https://reviews.llvm.org/D88701

Added: 
    llvm/test/CodeGen/AArch64/wineh-frame-predecrement.mir

Modified: 
    llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index d33ebdd330c0..0d52b00d54ba 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -579,6 +579,12 @@ static bool windowsRequiresStackProbe(MachineFunction &MF,
          !F.hasFnAttribute("no-stack-arg-probe");
 }
 
+static bool needsWinCFI(const MachineFunction &MF) {
+  const Function &F = MF.getFunction();
+  return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+         F.needsUnwindTableEntry();
+}
+
 bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
     MachineFunction &MF, uint64_t StackBumpBytes) const {
   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
@@ -589,6 +595,18 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
   if (AFI->getLocalStackSize() == 0)
     return false;
 
+  // For WinCFI, if optimizing for size, prefer to not combine the stack bump
+  // (to force a stp with predecrement) to match the packed unwind format,
+  // provided that there actually are any callee saved registers to merge the
+  // decrement with.
+  // This is potentially marginally slower, but allows using the packed
+  // unwind format for functions that both have a local area and callee saved
+  // registers. Using the packed unwind format notably reduces the size of
+  // the unwind info.
+  if (needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
+      MF.getFunction().hasOptSize())
+    return false;
+
   // 512 is the maximum immediate for stp/ldp that will be used for
   // callee-save save/restores
   if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes))
@@ -982,12 +1000,6 @@ static void adaptForLdStOpt(MachineBasicBlock &MBB,
   //
 }
 
-static bool needsWinCFI(const MachineFunction &MF) {
-  const Function &F = MF.getFunction();
-  return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
-         F.needsUnwindTableEntry();
-}
-
 static bool isTargetWindows(const MachineFunction &MF) {
   return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
 }

diff  --git a/llvm/test/CodeGen/AArch64/wineh-frame-predecrement.mir b/llvm/test/CodeGen/AArch64/wineh-frame-predecrement.mir
new file mode 100644
index 000000000000..1bed8f6b547a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/wineh-frame-predecrement.mir
@@ -0,0 +1,70 @@
+# RUN: llc -o - %s -mtriple=aarch64-windows -start-before=prologepilog \
+# RUN:   -stop-after=prologepilog | FileCheck %s
+
+# Check that the callee-saved registers are saved starting with a STP
+# with predecrement, followed by a separate stack adjustment later,
+# if the optsize attribute is set.
+
+# CHECK:      early-clobber $sp = frame-setup STPXpre killed $x19, killed $x20, $sp, -2
+# CHECK-NEXT: frame-setup SEH_SaveRegP_X 19, 20, -16
+# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: frame-setup SEH_StackAlloc 16
+# CHECK-NEXT: frame-setup SEH_PrologEnd
+
+--- |
+
+  define dso_local i32 @func(i32 %a) optsize { ret i32 %a }
+
+...
+---
+name:            func
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:       []
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  4
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:
+  - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      local-offset: -4, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+callSites:       []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $x0, $x19, $x20
+
+    renamable $x8 = ADDXri %stack.0, 0, 0
+    $x19 = ADDXrr $x0, $x8
+    $x20 = ADDXrr $x19, $x0
+    $x0 = ADDXrr $x0, killed $x20
+
+    RET_ReallyLR
+
+...


        


More information about the llvm-commits mailing list