[llvm] 1308bb9 - [MC] [Win64EH] Write packed ARM64 epilogues if possible
Martin Storsjö via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 11 00:34:33 PDT 2020
Author: Martin Storsjö
Date: 2020-09-11T10:31:04+03:00
New Revision: 1308bb99e06752ab0b5175c92da31083f91af921
URL: https://github.com/llvm/llvm-project/commit/1308bb99e06752ab0b5175c92da31083f91af921
DIFF: https://github.com/llvm/llvm-project/commit/1308bb99e06752ab0b5175c92da31083f91af921.diff
LOG: [MC] [Win64EH] Write packed ARM64 epilogues if possible
This gives a pretty substantial size reduction; for a 6.5 MB
DLL with 300 KB .xdata, the .xdata shrinks by 66 KB.
Differential Revision: https://reviews.llvm.org/D87369
Added:
llvm/test/MC/AArch64/seh-packed-epilog.s
Modified:
llvm/include/llvm/MC/MCWinEH.h
llvm/lib/MC/MCWin64EH.cpp
llvm/test/CodeGen/AArch64/wineh3.mir
llvm/test/CodeGen/AArch64/wineh6.mir
llvm/test/CodeGen/AArch64/wineh7.mir
llvm/test/MC/AArch64/seh.s
Removed:
################################################################################
diff --git a/llvm/include/llvm/MC/MCWinEH.h b/llvm/include/llvm/MC/MCWinEH.h
index 53cffccce8c1..f05f5f1641cd 100644
--- a/llvm/include/llvm/MC/MCWinEH.h
+++ b/llvm/include/llvm/MC/MCWinEH.h
@@ -26,6 +26,14 @@ struct Instruction {
Instruction(unsigned Op, MCSymbol *L, unsigned Reg, unsigned Off)
: Label(L), Offset(Off), Register(Reg), Operation(Op) {}
+
+ bool operator==(const Instruction &I) const {
+ // Check whether two instructions refer to the same operation
+ // applied at a
diff erent spot (i.e. pointing at a
diff erent label).
+ return Offset == I.Offset && Register == I.Register &&
+ Operation == I.Operation;
+ }
+ bool operator!=(const Instruction &I) const { return !(*this == I); }
};
struct FrameInfo {
diff --git a/llvm/lib/MC/MCWin64EH.cpp b/llvm/lib/MC/MCWin64EH.cpp
index e9ab88234ad3..a585b5082837 100644
--- a/llvm/lib/MC/MCWin64EH.cpp
+++ b/llvm/lib/MC/MCWin64EH.cpp
@@ -264,8 +264,7 @@ static int64_t GetAbsDifference(MCStreamer &Streamer, const MCSymbol *LHS,
return value;
}
-static uint32_t
-ARM64CountOfUnwindCodes(const std::vector<WinEH::Instruction> &Insns) {
+static uint32_t ARM64CountOfUnwindCodes(ArrayRef<WinEH::Instruction> Insns) {
uint32_t Count = 0;
for (const auto &I : Insns) {
switch (static_cast<Win64EH::UnwindOpcodes>(I.Operation)) {
@@ -553,18 +552,23 @@ static void simplifyOpcodes(std::vector<WinEH::Instruction> &Instructions,
// Convert 2-byte opcodes into equivalent 1-byte ones.
if (Inst.Operation == Win64EH::UOP_SaveRegP && Inst.Register == 29) {
Inst.Operation = Win64EH::UOP_SaveFPLR;
+ Inst.Register = -1;
} else if (Inst.Operation == Win64EH::UOP_SaveRegPX &&
Inst.Register == 29) {
Inst.Operation = Win64EH::UOP_SaveFPLRX;
+ Inst.Register = -1;
} else if (Inst.Operation == Win64EH::UOP_SaveRegPX &&
Inst.Register == 19 && Inst.Offset <= 248) {
Inst.Operation = Win64EH::UOP_SaveR19R20X;
+ Inst.Register = -1;
} else if (Inst.Operation == Win64EH::UOP_AddFP && Inst.Offset == 0) {
Inst.Operation = Win64EH::UOP_SetFP;
} else if (Inst.Operation == Win64EH::UOP_SaveRegP &&
Inst.Register == PrevRegister + 2 &&
Inst.Offset == PrevOffset + 16) {
Inst.Operation = Win64EH::UOP_SaveNext;
+ Inst.Register = -1;
+ Inst.Offset = 0;
// Intentionally not creating UOP_SaveNext for float register pairs,
// as current versions of Windows (up to at least 20.04) is buggy
// regarding SaveNext for float pairs.
@@ -601,6 +605,47 @@ static void simplifyOpcodes(std::vector<WinEH::Instruction> &Instructions,
}
}
+static int checkPackedEpilog(MCStreamer &streamer, WinEH::FrameInfo *info,
+ int PrologCodeBytes) {
+ // Can only pack if there's one single epilog
+ if (info->EpilogMap.size() != 1)
+ return -1;
+
+ const std::vector<WinEH::Instruction> &Epilog =
+ info->EpilogMap.begin()->second;
+
+ // Can pack if the epilog is a subset of the prolog but not vice versa
+ if (Epilog.size() > info->Instructions.size())
+ return -1;
+
+ // Check that the epilog actually is a perfect match for the end (backwrds)
+ // of the prolog.
+ for (int I = Epilog.size() - 1; I >= 0; I--) {
+ if (info->Instructions[I] != Epilog[Epilog.size() - 1 - I])
+ return -1;
+ }
+
+ // Check that the epilog actually is at the very end of the function,
+ // otherwise it can't be packed.
+ uint32_t DistanceFromEnd = (uint32_t)GetAbsDifference(
+ streamer, info->FuncletOrFuncEnd, info->EpilogMap.begin()->first);
+ if (DistanceFromEnd / 4 != Epilog.size())
+ return -1;
+
+ int Offset = ARM64CountOfUnwindCodes(
+ ArrayRef<WinEH::Instruction>(&info->Instructions[Epilog.size()],
+ info->Instructions.size() - Epilog.size()));
+
+ // Check that the offset and prolog size fits in the first word; it's
+ // unclear whether the epilog count in the extension word can be taken
+ // as packed epilog offset.
+ if (Offset > 31 || PrologCodeBytes > 124)
+ return -1;
+
+ info->EpilogMap.clear();
+ return Offset;
+}
+
// Populate the .xdata section. The format of .xdata on ARM64 is documented at
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
@@ -679,6 +724,8 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
uint32_t PrologCodeBytes = ARM64CountOfUnwindCodes(info->Instructions);
uint32_t TotalCodeBytes = PrologCodeBytes;
+ int PackedEpilogOffset = checkPackedEpilog(streamer, info, PrologCodeBytes);
+
// Process epilogs.
MapVector<MCSymbol *, uint32_t> EpilogInfo;
// Epilogs processed so far.
@@ -711,15 +758,17 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
uint32_t CodeWordsMod = TotalCodeBytes % 4;
if (CodeWordsMod)
CodeWords++;
- uint32_t EpilogCount = info->EpilogMap.size();
+ uint32_t EpilogCount =
+ PackedEpilogOffset >= 0 ? PackedEpilogOffset : info->EpilogMap.size();
bool ExtensionWord = EpilogCount > 31 || TotalCodeBytes > 124;
if (!ExtensionWord) {
row1 |= (EpilogCount & 0x1F) << 22;
row1 |= (CodeWords & 0x1F) << 27;
}
- // E is always 0 right now, TODO: packed epilog setup
if (info->HandlesExceptions) // X
row1 |= 1 << 20;
+ if (PackedEpilogOffset >= 0) // E
+ row1 |= 1 << 21;
row1 |= FuncLength & 0x3FFFF;
streamer.emitInt32(row1);
diff --git a/llvm/test/CodeGen/AArch64/wineh3.mir b/llvm/test/CodeGen/AArch64/wineh3.mir
index 6cbe7f42dc5e..d1ffa4aedc08 100644
--- a/llvm/test/CodeGen/AArch64/wineh3.mir
+++ b/llvm/test/CodeGen/AArch64/wineh3.mir
@@ -8,9 +8,9 @@
# CHECK-NEXT: FunctionLength: 124
# CHECK-NEXT: Version: 0
# CHECK-NEXT: ExceptionData: No
-# CHECK-NEXT: EpiloguePacked: No
-# CHECK-NEXT: EpilogueScopes: 1
-# CHECK-NEXT: ByteCodeLength: 32
+# CHECK-NEXT: EpiloguePacked: Yes
+# CHECK-NEXT: EpilogueOffset: 0
+# CHECK-NEXT: ByteCodeLength: 16
# CHECK-NEXT: Prologue [
# CHECK-NEXT: 0xc80c ; stp x19, x20, [sp, #96]
# CHECK-NEXT: 0xc88a ; stp x21, x22, [sp, #80]
@@ -21,22 +21,6 @@
# CHECK-NEXT: 0xda8d ; stp d10, d11, [sp, #-112]!
# CHECK-NEXT: 0xe4 ; end
# CHECK-NEXT: ]
-# CHECK-NEXT: EpilogueScopes [
-# CHECK-NEXT: EpilogueScope {
-# CHECK-NEXT: StartOffset: 23
-# CHECK-NEXT: EpilogueStartIndex: 15
-# CHECK-NEXT: Opcodes [
-# CHECK-NEXT: 0xc80c ; ldp x19, x20, [sp, #96]
-# CHECK-NEXT: 0xc88a ; ldp x21, x22, [sp, #80]
-# CHECK-NEXT: 0xc908 ; ldp x23, x24, [sp, #64]
-# CHECK-NEXT: 0xc986 ; ldp x25, x26, [sp, #48]
-# CHECK-NEXT: 0xca04 ; ldp x27, x28, [sp, #32]
-# CHECK-NEXT: 0xd802 ; ldp d8, d9, [sp, #16]
-# CHECK-NEXT: 0xda8d ; ldp d10, d11, [sp], #112
-# CHECK-NEXT: 0xe4 ; end
-# CHECK-NEXT: ]
-# CHECK-NEXT: }
-# CHECK-NEXT: ]
# CHECK-NEXT: }
...
---
diff --git a/llvm/test/CodeGen/AArch64/wineh6.mir b/llvm/test/CodeGen/AArch64/wineh6.mir
index 95a11aa3c4e8..e7592bd71146 100644
--- a/llvm/test/CodeGen/AArch64/wineh6.mir
+++ b/llvm/test/CodeGen/AArch64/wineh6.mir
@@ -6,25 +6,19 @@
# CHECK-NEXT: FunctionLength: 92
# CHECK-NEXT: Version: 0
# CHECK-NEXT: ExceptionData: No
-# CHECK-NEXT: EpiloguePacked: No
-# CHECK-NEXT: EpilogueScopes: 1
-# CHECK-NEXT: ByteCodeLength: 8
+# CHECK-NEXT: EpiloguePacked: Yes
+# CHECK-NEXT: EpilogueOffset: 1
+# CHECK-NEXT: ByteCodeLength: 4
# CHECK-NEXT: Prologue [
# CHECK-NEXT: 0x02 ; sub sp, #32
# CHECK-NEXT: 0xe1 ; mov fp, sp
# CHECK-NEXT: 0x81 ; stp x29, x30, [sp, #-16]!
# CHECK-NEXT: 0xe4 ; end
# CHECK-NEXT: ]
-# CHECK-NEXT: EpilogueScopes [
-# CHECK-NEXT: EpilogueScope {
-# CHECK-NEXT: StartOffset: 20
-# CHECK-NEXT: EpilogueStartIndex: 4
-# CHECK-NEXT: Opcodes [
-# CHECK-NEXT: 0xe1 ; mov sp, fp
-# CHECK-NEXT: 0x81 ; ldp x29, x30, [sp], #16
-# CHECK-NEXT: 0xe4 ; end
-# CHECK-NEXT: ]
-# CHECK-NEXT: }
+# CHECK-NEXT: Epilogue [
+# CHECK-NEXT: 0xe1 ; mov sp, fp
+# CHECK-NEXT: 0x81 ; ldp x29, x30, [sp], #16
+# CHECK-NEXT: 0xe4 ; end
# CHECK-NEXT: ]
# CHECK-NEXT: }
...
diff --git a/llvm/test/CodeGen/AArch64/wineh7.mir b/llvm/test/CodeGen/AArch64/wineh7.mir
index da64b3c002f3..6bf06d80861a 100644
--- a/llvm/test/CodeGen/AArch64/wineh7.mir
+++ b/llvm/test/CodeGen/AArch64/wineh7.mir
@@ -6,9 +6,9 @@
# CHECK-NEXT: FunctionLength: 72
# CHECK-NEXT: Version: 0
# CHECK-NEXT: ExceptionData: No
-# CHECK-NEXT: EpiloguePacked: No
-# CHECK-NEXT: EpilogueScopes: 1
-# CHECK-NEXT: ByteCodeLength: 16
+# CHECK-NEXT: EpiloguePacked: Yes
+# CHECK-NEXT: EpilogueOffset: 0
+# CHECK-NEXT: ByteCodeLength: 8
# CHECK-NEXT: Prologue [
# CHECK-NEXT: 0xe204 ; add fp, sp, #32
# CHECK-NEXT: 0x44 ; stp x29, x30, [sp, #32]
@@ -16,19 +16,6 @@
# CHECK-NEXT: 0xcc85 ; stp x21, x22, [sp, #-48]!
# CHECK-NEXT: 0xe4 ; end
# CHECK-NEXT: ]
-# CHECK-NEXT: EpilogueScopes [
-# CHECK-NEXT: EpilogueScope {
-# CHECK-NEXT: StartOffset: 13
-# CHECK-NEXT: EpilogueStartIndex: 8
-# CHECK-NEXT: Opcodes [
-# CHECK-NEXT: 0xe204 ; sub sp, fp, #32
-# CHECK-NEXT: 0x44 ; ldp x29, x30, [sp, #32]
-# CHECK-NEXT: 0xc802 ; ldp x19, x20, [sp, #16]
-# CHECK-NEXT: 0xcc85 ; ldp x21, x22, [sp], #48
-# CHECK-NEXT: 0xe4 ; end
-# CHECK-NEXT: ]
-# CHECK-NEXT: }
-# CHECK-NEXT: ]
# CHECK-NEXT: }
# CHECK-NEXT: }
diff --git a/llvm/test/MC/AArch64/seh-packed-epilog.s b/llvm/test/MC/AArch64/seh-packed-epilog.s
new file mode 100644
index 000000000000..f9978ea7a113
--- /dev/null
+++ b/llvm/test/MC/AArch64/seh-packed-epilog.s
@@ -0,0 +1,187 @@
+// This test checks that the epilogue is packed where possible.
+
+// RUN: llvm-mc -triple aarch64-pc-win32 -filetype=obj %s -o %t.o
+// RUN: llvm-readobj -u %t.o | FileCheck %s
+
+// CHECK: UnwindInformation [
+// CHECK-NEXT: RuntimeFunction {
+// CHECK-NEXT: Function: func
+// CHECK-NEXT: ExceptionRecord: .xdata
+// CHECK-NEXT: ExceptionData {
+// CHECK-NEXT: FunctionLength:
+// CHECK-NEXT: Version:
+// CHECK-NEXT: ExceptionData:
+// CHECK-NEXT: EpiloguePacked: Yes
+// CHECK-NEXT: EpilogueOffset: 2
+// CHECK-NEXT: ByteCodeLength:
+// CHECK-NEXT: Prologue [
+// CHECK-NEXT: 0xdc04 ; str d8, [sp, #32]
+// CHECK-NEXT: 0xe1 ; mov fp, sp
+// CHECK-NEXT: 0x42 ; stp x29, x30, [sp, #16]
+// CHECK-NEXT: 0x85 ; stp x29, x30, [sp, #-48]!
+// CHECK-NEXT: 0xe6 ; save next
+// CHECK-NEXT: 0x24 ; stp x19, x20, [sp, #-32]!
+// CHECK-NEXT: 0xc842 ; stp x20, x21, [sp, #16]
+// CHECK-NEXT: 0x03 ; sub sp, #48
+// CHECK-NEXT: 0xe4 ; end
+// CHECK-NEXT: ]
+// CHECK-NEXT: Epilogue [
+// CHECK-NEXT: 0xe1 ; mov sp, fp
+// CHECK-NEXT: 0x42 ; ldp x29, x30, [sp, #16]
+// CHECK-NEXT: 0x85 ; ldp x29, x30, [sp], #48
+// CHECK-NEXT: 0xe6 ; restore next
+// CHECK-NEXT: 0x24 ; ldp x19, x20, [sp], #32
+// CHECK-NEXT: 0xc842 ; ldp x20, x21, [sp, #16]
+// CHECK-NEXT: 0x03 ; add sp, #48
+// CHECK-NEXT: 0xe4 ; end
+// CHECK-NEXT: ]
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK: RuntimeFunction {
+// CHECK-NEXT: Function: packed2
+// CHECK-NEXT: ExceptionRecord:
+// CHECK-NEXT: ExceptionData {
+// CHECK: ExceptionData:
+// CHECK-NEXT: EpiloguePacked: Yes
+// CHECK: RuntimeFunction {
+// CHECK-NEXT: Function: nonpacked1
+// CHECK-NEXT: ExceptionRecord:
+// CHECK-NEXT: ExceptionData {
+// CHECK: ExceptionData:
+// CHECK-NEXT: EpiloguePacked: No
+// CHECK: RuntimeFunction {
+// CHECK-NEXT: Function: nonpacked2
+// CHECK-NEXT: ExceptionRecord:
+// CHECK-NEXT: ExceptionData {
+// CHECK: ExceptionData:
+// CHECK-NEXT: EpiloguePacked: No
+// CHECK: RuntimeFunction {
+// CHECK-NEXT: Function: nonpacked3
+// CHECK-NEXT: ExceptionRecord:
+// CHECK-NEXT: ExceptionData {
+// CHECK: ExceptionData:
+// CHECK-NEXT: EpiloguePacked: No
+
+ .text
+ .globl func
+ .seh_proc func
+func:
+ sub sp, sp, #48
+ .seh_stackalloc 48
+ // Check that canonical opcode forms (r19r20_x, fplr, fplr_x, save_next,
+ // set_fp) are treated as a match even if one (in prologue or epilogue)
+ // was simplified from the more generic opcodes.
+ stp x20, x21, [sp, #16]
+ .seh_save_regp x20, 16
+ stp x19, x20, [sp, #-32]!
+ .seh_save_r19r20_x 32
+ stp x21, x22, [sp, #16]
+ .seh_save_regp x21, 16
+ stp x29, x30, [sp, #-48]!
+ .seh_save_regp_x x29, 48
+ stp x29, x30, [sp, #16]
+ .seh_save_regp x29, 16
+ add x29, sp, #0
+ .seh_add_fp 0
+ str d8, [sp, #32]
+ .seh_save_freg d8, 32
+ .seh_endprologue
+
+ nop
+
+ .seh_startepilogue
+ mov sp, x29
+ .seh_set_fp
+ ldp x29, x30, [sp, #16]
+ .seh_save_fplr 16
+ ldp x29, x30, [sp, #-48]!
+ .seh_save_fplr_x 48
+ ldp x21, x22, [sp, #16]
+ .seh_save_next
+ ldp x19, x20, [sp], #32
+ .seh_save_regp_x x19, 32
+ ldp x20, x21, [sp, #16]
+ .seh_save_regp x20, 16
+ add sp, sp, #48
+ .seh_stackalloc 48
+ .seh_endepilogue
+ ret
+ .seh_endproc
+
+
+ // Test a perfectly matching epilog with no offset.
+ .seh_proc packed2
+packed2:
+ sub sp, sp, #48
+ .seh_stackalloc 48
+ stp x29, lr, [sp, #-32]!
+ .seh_save_fplr_x 32
+ .seh_endprologue
+ nop
+ .seh_startepilogue
+ ldp x29, lr, [sp], #32
+ .seh_save_fplr_x 32
+ add sp, sp, #48
+ .seh_stackalloc 48
+ .seh_endepilogue
+ ret
+ .seh_endproc
+
+
+ .seh_proc nonpacked1
+nonpacked1:
+ sub sp, sp, #48
+ .seh_stackalloc 48
+ .seh_endprologue
+
+ nop
+ .seh_startepilogue
+ add sp, sp, #48
+ .seh_stackalloc 48
+ .seh_endepilogue
+ // This epilogue isn't packed with the prologue, as it doesn't align with
+ // the end of the function (one extra nop before the ret).
+ nop
+ ret
+ .seh_endproc
+
+
+ .seh_proc nonpacked2
+nonpacked2:
+ sub sp, sp, #48
+ .seh_stackalloc 48
+ sub sp, sp, #32
+ .seh_stackalloc 32
+ .seh_endprologue
+
+ nop
+ .seh_startepilogue
+ // Not packed; the epilogue mismatches at the second opcode.
+ add sp, sp, #16
+ .seh_stackalloc 16
+ add sp, sp, #48
+ .seh_stackalloc 48
+ .seh_endepilogue
+ ret
+ .seh_endproc
+
+ .seh_proc nonpacked3
+nonpacked3:
+ sub sp, sp, #48
+ .seh_stackalloc 48
+ sub sp, sp, #32
+ .seh_stackalloc 32
+ .seh_endprologue
+
+ nop
+ .seh_startepilogue
+ // Not packed; the epilogue is longer than the prologue.
+ mov sp, x29
+ .seh_set_fp
+ add sp, sp, #32
+ .seh_stackalloc 32
+ add sp, sp, #48
+ .seh_stackalloc 48
+ .seh_endepilogue
+ ret
+ .seh_endproc
diff --git a/llvm/test/MC/AArch64/seh.s b/llvm/test/MC/AArch64/seh.s
index 4e235d032d68..0da956cbf2f5 100644
--- a/llvm/test/MC/AArch64/seh.s
+++ b/llvm/test/MC/AArch64/seh.s
@@ -20,7 +20,7 @@
// CHECK-NEXT: }
// CHECK: Section {
// CHECK: Name: .xdata
-// CHECK: RawDataSize: 56
+// CHECK: RawDataSize: 52
// CHECK: RelocationCount: 1
// CHECK: Characteristics [
// CHECK-NEXT: ALIGN_4BYTES
@@ -41,7 +41,7 @@
// CHECK-NEXT: Relocations [
// CHECK-NEXT: Section (4) .xdata {
-// CHECK-NEXT: 0x2C IMAGE_REL_ARM64_ADDR32NB __C_specific_handler
+// CHECK-NEXT: 0x28 IMAGE_REL_ARM64_ADDR32NB __C_specific_handler
// CHECK-NEXT: }
// CHECK-NEXT: Section (5) .pdata {
// CHECK-NEXT: 0x0 IMAGE_REL_ARM64_ADDR32NB func
@@ -80,15 +80,9 @@
// CHECK-NEXT: 0x01 ; sub sp, #16
// CHECK-NEXT: 0xe4 ; end
// CHECK-NEXT: ]
-// CHECK-NEXT: EpilogueScopes [
-// CHECK-NEXT: EpilogueScope {
-// CHECK-NEXT: StartOffset: 23
-// CHECK-NEXT: EpilogueStartIndex: 33
-// CHECK-NEXT: Opcodes [
-// CHECK-NEXT: 0x01 ; add sp, #16
-// CHECK-NEXT: 0xe4 ; end
-// CHECK-NEXT: ]
-// CHECK-NEXT: }
+// CHECK-NEXT: Epilogue [
+// CHECK-NEXT: 0x01 ; add sp, #16
+// CHECK-NEXT: 0xe4 ; end
// CHECK-NEXT: ]
// CHECK-NEXT: ExceptionHandler [
// CHECK-NEXT: Routine: __C_specific_handler (0x0)
More information about the llvm-commits
mailing list