[llvm] 700fbe5 - [MC] [Win64EH] Canonicalize ARM64 unwind opcodes

Fri Sep 11 00:34:31 PDT 2020

Author: Martin Storsjö
Date: 2020-09-11T10:31:04+03:00
New Revision: 700fbe591ac0f29c76e9f2bd77d752d4bd56d274

URL: https://github.com/llvm/llvm-project/commit/700fbe591ac0f29c76e9f2bd77d752d4bd56d274
DIFF: https://github.com/llvm/llvm-project/commit/700fbe591ac0f29c76e9f2bd77d752d4bd56d274.diff

LOG: [MC] [Win64EH] Canonicalize ARM64 unwind opcodes

Convert 2-byte opcodes to equivalent 1-byte ones.

Adjust the existing exhaustive testcase to avoid being altered by
the simplification rules (to keep that test exercising all individual
opcodes).

Fix the assembler parser limits for register pairs; for .seh_save_regp
and .seh_save_regp_x, we can allow up to x29, for a x29+x30 pair
(which gets remapped to the UOP_SaveFPLR(X) opcodes), for .seh_save_fregp
and .seh_save_fregpx, allow up to d14+d15.

Not creating .seh_save_next for float register pairs, as the
actual unwinder implementation in current versions of Windows is buggy
for that case.

This gives a minimal but measurable size reduction. (For a 6.5 MB
DLL with 300 KB .xdata, the .xdata shrinks by 48 bytes. The opcode
sequences are padded to a 4 byte boundary, so very small improvements
might not end up mattering directly.)

Differential Revision: https://reviews.llvm.org/D87367

Added: 
    llvm/test/MC/AArch64/seh-optimize.s

Modified: 
    llvm/lib/MC/MCWin64EH.cpp
    llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
    llvm/test/MC/AArch64/seh.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/MC/MCWin64EH.cpp b/llvm/lib/MC/MCWin64EH.cpp
index fb0de40fc6d5..e9ab88234ad3 100644

--- a/llvm/lib/MC/MCWin64EH.cpp
+++ b/llvm/lib/MC/MCWin64EH.cpp
@@ -544,6 +544,63 @@ FindMatchingEpilog(const std::vector<WinEH::Instruction>& EpilogInstrs,
   return nullptr;
 }
 
+static void simplifyOpcodes(std::vector<WinEH::Instruction> &Instructions,
+                            bool Reverse) {
+  unsigned PrevOffset = -1;
+  unsigned PrevRegister = -1;
+
+  auto VisitInstruction = [&](WinEH::Instruction &Inst) {
+    // Convert 2-byte opcodes into equivalent 1-byte ones.
+    if (Inst.Operation == Win64EH::UOP_SaveRegP && Inst.Register == 29) {
+      Inst.Operation = Win64EH::UOP_SaveFPLR;
+    } else if (Inst.Operation == Win64EH::UOP_SaveRegPX &&
+               Inst.Register == 29) {
+      Inst.Operation = Win64EH::UOP_SaveFPLRX;
+    } else if (Inst.Operation == Win64EH::UOP_SaveRegPX &&
+               Inst.Register == 19 && Inst.Offset <= 248) {
+      Inst.Operation = Win64EH::UOP_SaveR19R20X;
+    } else if (Inst.Operation == Win64EH::UOP_AddFP && Inst.Offset == 0) {
+      Inst.Operation = Win64EH::UOP_SetFP;
+    } else if (Inst.Operation == Win64EH::UOP_SaveRegP &&
+               Inst.Register == PrevRegister + 2 &&
+               Inst.Offset == PrevOffset + 16) {
+      Inst.Operation = Win64EH::UOP_SaveNext;
+      // Intentionally not creating UOP_SaveNext for float register pairs,
+      // as current versions of Windows (up to at least 20.04) is buggy
+      // regarding SaveNext for float pairs.
+    }
+    // Update info about the previous instruction, for detecting if
+    // the next one can be made a UOP_SaveNext
+    if (Inst.Operation == Win64EH::UOP_SaveR19R20X) {
+      PrevOffset = 0;
+      PrevRegister = 19;
+    } else if (Inst.Operation == Win64EH::UOP_SaveRegPX) {
+      PrevOffset = 0;
+      PrevRegister = Inst.Register;
+    } else if (Inst.Operation == Win64EH::UOP_SaveRegP) {
+      PrevOffset = Inst.Offset;
+      PrevRegister = Inst.Register;
+    } else if (Inst.Operation == Win64EH::UOP_SaveNext) {
+      PrevRegister += 2;
+      PrevOffset += 16;
+    } else {
+      PrevRegister = -1;
+      PrevOffset = -1;
+    }
+  };
+
+  // Iterate over instructions in a forward order (for prologues),
+  // backwards for epilogues (i.e. always reverse compared to how the
+  // opcodes are stored).
+  if (Reverse) {
+    for (auto It = Instructions.rbegin(); It != Instructions.rend(); It++)
+      VisitInstruction(*It);
+  } else {
+    for (WinEH::Instruction &Inst : Instructions)
+      VisitInstruction(Inst);
+  }
+}
+
 // Populate the .xdata section.  The format of .xdata on ARM64 is documented at
 // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
 static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
@@ -572,6 +629,10 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
     return;
   }
 
+  simplifyOpcodes(info->Instructions, false);
+  for (auto &I : info->EpilogMap)
+    simplifyOpcodes(I.second, true);
+
   MCContext &context = streamer.getContext();
   MCSymbol *Label = context.createTempSymbol();
 

diff  --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 08a29bbb3e87..502966c63367 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -5725,7 +5725,7 @@ bool AArch64AsmParser::parseDirectiveSEHSaveRegX(SMLoc L) {
 bool AArch64AsmParser::parseDirectiveSEHSaveRegP(SMLoc L) {
   unsigned Reg;
   int64_t Offset;
-  if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::LR) ||
+  if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::FP) ||
       parseComma() || parseImmExpr(Offset))
     return true;
   getTargetStreamer().EmitARM64WinCFISaveRegP(Reg, Offset);
@@ -5737,7 +5737,7 @@ bool AArch64AsmParser::parseDirectiveSEHSaveRegP(SMLoc L) {
 bool AArch64AsmParser::parseDirectiveSEHSaveRegPX(SMLoc L) {
   unsigned Reg;
   int64_t Offset;
-  if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::X28) ||
+  if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::FP) ||
       parseComma() || parseImmExpr(Offset))
     return true;
   getTargetStreamer().EmitARM64WinCFISaveRegPX(Reg, Offset);
@@ -5789,7 +5789,7 @@ bool AArch64AsmParser::parseDirectiveSEHSaveFRegX(SMLoc L) {
 bool AArch64AsmParser::parseDirectiveSEHSaveFRegP(SMLoc L) {
   unsigned Reg;
   int64_t Offset;
-  if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D15) ||
+  if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D14) ||
       parseComma() || parseImmExpr(Offset))
     return true;
   getTargetStreamer().EmitARM64WinCFISaveFRegP(Reg, Offset);
@@ -5801,7 +5801,7 @@ bool AArch64AsmParser::parseDirectiveSEHSaveFRegP(SMLoc L) {
 bool AArch64AsmParser::parseDirectiveSEHSaveFRegPX(SMLoc L) {
   unsigned Reg;
   int64_t Offset;
-  if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D15) ||
+  if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D14) ||
       parseComma() || parseImmExpr(Offset))
     return true;
   getTargetStreamer().EmitARM64WinCFISaveFRegPX(Reg, Offset);

diff  --git a/llvm/test/MC/AArch64/seh-optimize.s b/llvm/test/MC/AArch64/seh-optimize.s
new file mode 100644
index 000000000000..0bf33af9cc75
--- /dev/null
+++ b/llvm/test/MC/AArch64/seh-optimize.s
@@ -0,0 +1,106 @@
+// This test checks that the unwinding opcodes are remapped to more
+// efficient ones where possible.
+
+// RUN: llvm-mc -triple aarch64-pc-win32 -filetype=obj %s -o %t.o
+// RUN: llvm-readobj -u %t.o | FileCheck %s
+
+// CHECK:      UnwindInformation [
+// CHECK-NEXT:   RuntimeFunction {
+// CHECK-NEXT:     Function: func
+// CHECK-NEXT:     ExceptionRecord: .xdata
+// CHECK-NEXT:     ExceptionData {
+// CHECK:            Prologue [
+// CHECK-NEXT:         0xd882              ; stp d10, d11, [sp, #16]
+// CHECK-NEXT:         0xda07              ; stp d8, d9, [sp, #-64]!
+// CHECK-NEXT:         0xe6                ; save next
+// CHECK-NEXT:         0x28                ; stp x19, x20, [sp, #-64]!
+// CHECK-NEXT:         0xca49              ; stp x28, x29, [sp, #72]
+// CHECK-NEXT:         0xe6                ; save next
+// CHECK-NEXT:         0xe6                ; save next
+// CHECK-NEXT:         0xe6                ; save next
+// CHECK-NEXT:         0xcc47              ; stp x20, x21, [sp, #-64]!
+// CHECK-NEXT:         0x42                ; stp x29, x30, [sp, #16]
+// CHECK-NEXT:         0xca02              ; stp x27, x28, [sp, #16]
+// CHECK-NEXT:         0x83                ; stp x29, x30, [sp, #-32]!
+// CHECK-NEXT:         0xce03              ; stp x27, x28, [sp, #-32]!
+// CHECK-NEXT:         0xe1                ; mov fp, sp
+// CHECK-NEXT:         0xe201              ; add fp, sp, #8
+// CHECK-NEXT:         0xe4                ; end
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       EpilogueScopes [
+// CHECK-NEXT:         EpilogueScope {
+// CHECK:                Opcodes [
+// CHECK-NEXT:             0xc904              ; ldp x23, x24, [sp, #32]
+// CHECK-NEXT:             0xe6                ; restore next
+// CHECK-NEXT:             0xcc83              ; ldp x21, x22, [sp], #32
+// CHECK-NEXT:             0x24                ; ldp x19, x20, [sp], #32
+// CHECK-NEXT:             0xcc1f              ; ldp x19, x20, [sp], #256
+// CHECK-NEXT:             0xe4                ; end
+// CHECK-NEXT:           ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:       ]
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+
+
+    .text
+    .globl func
+    .seh_proc func
+func:
+    add x29, sp, #8
+    .seh_add_fp 8
+    add x29, sp, #0
+    .seh_add_fp 0
+
+    stp x27, x28, [sp, #-32]!
+    .seh_save_regp_x x27, 32
+    stp x29, x30, [sp, #-32]!
+    .seh_save_regp_x x29, 32
+
+    stp x27, x28, [sp, #16]
+    .seh_save_regp x27, 16
+    stp x29, x30, [sp, #16]
+    .seh_save_regp x29, 16
+
+    stp x20, x21, [sp, #-64]!
+    .seh_save_regp_x x20, 64
+    stp x22, x23, [sp, #16]
+    .seh_save_regp x22, 16
+    stp x24, x25, [sp, #32]
+    .seh_save_next
+    stp x26, x27, [sp, #48]
+    .seh_save_regp x26, 48
+    stp x28, x29, [sp, #72]
+    .seh_save_regp x28, 72
+
+    stp x19, x20, [sp, #-64]!
+    .seh_save_r19r20_x 64
+    stp x21, x22, [sp, #16]
+    .seh_save_regp x21, 16
+
+    stp d8,  d9,  [sp, #-64]!
+    .seh_save_fregp_x d8, 64
+    stp d10, d11, [sp, #16]
+    // This is intentionally not converted into a save_next, to avoid
+    // bugs in the windows unwinder.
+    .seh_save_fregp d10, 16
+
+    .seh_endprologue
+
+    nop
+
+    .seh_startepilogue
+    ldp x27, x28, [sp, #32]
+    .seh_save_regp x23, 32
+    ldp x23, x24, [sp, #16]
+    .seh_save_regp x23, 16
+    ldp x21, x22, [sp], #32
+    .seh_save_regp_x x21, 32
+    ldp x19, x20, [sp], #32
+    .seh_save_regp_x x19, 32
+    ldp x19, x20, [sp], #256
+    .seh_save_regp_x x19, 256
+    .seh_endepilogue
+    ret
+    .seh_endproc

diff  --git a/llvm/test/MC/AArch64/seh.s b/llvm/test/MC/AArch64/seh.s
index f7faa64b9309..4e235d032d68 100644
--- a/llvm/test/MC/AArch64/seh.s
+++ b/llvm/test/MC/AArch64/seh.s
@@ -64,8 +64,8 @@
 // CHECK-NEXT:         0xe202              ; add fp, sp, #16
 // CHECK-NEXT:         0xdd41              ; str d13, [sp, #8]
 // CHECK-NEXT:         0xde83              ; str d12, [sp, #-32]!
-// CHECK-NEXT:         0xd882              ; stp d10, d11, [sp, #16]
-// CHECK-NEXT:         0xda03              ; stp d8, d9, [sp, #-32]!
+// CHECK-NEXT:         0xd884              ; stp d10, d11, [sp, #32]
+// CHECK-NEXT:         0xda05              ; stp d8, d9, [sp, #-48]!
 // CHECK-NEXT:         0x83                ; stp x29, x30, [sp, #-32]!
 // CHECK-NEXT:         0x46                ; stp x29, x30, [sp, #48]
 // CHECK-NEXT:         0xd141              ; str x24, [sp, #8]
@@ -74,7 +74,7 @@
 // CHECK-NEXT:         0xc882              ; stp x21, x22, [sp, #16]
 // CHECK-NEXT:         0xd6c2              ; stp x25, lr, [sp, #16]
 // CHECK-NEXT:         0x24                ; stp x19, x20, [sp, #-32]!
-// CHECK-NEXT:         0xcc03              ; stp x19, x20, [sp, #-32]!
+// CHECK-NEXT:         0xcc83              ; stp x21, x22, [sp, #-32]!
 // CHECK-NEXT:         0x83                ; stp x29, x30, [sp, #-32]!
 // CHECK-NEXT:         0xe1                ; mov fp, sp
 // CHECK-NEXT:         0x01                ; sub sp, #16
@@ -113,8 +113,8 @@ func:
     .seh_set_fp
     stp x29, x30, [sp, #-32]!
     .seh_save_fplr_x 32
-    stp x19, x20, [sp, #-32]!
-    .seh_save_regp_x x19, 32
+    stp x21, x22, [sp, #-32]!
+    .seh_save_regp_x x21, 32
     stp x19, x20, [sp, #-32]!
     .seh_save_r19r20_x 32
     stp x25, x30, [sp, #16]
@@ -131,10 +131,10 @@ func:
     .seh_save_fplr 48
     stp x29, x30, [sp, #-32]!
     .seh_save_fplr_x 32
-    stp d8, d9, [sp, #-32]!
-    .seh_save_fregp_x d8, 32
-    stp d10, d11, [sp, #16]
-    .seh_save_fregp d10, 16
+    stp d8, d9, [sp, #-48]!
+    .seh_save_fregp_x d8, 48
+    stp d10, d11, [sp, #32]
+    .seh_save_fregp d10, 32
     str d12, [sp, #-32]!
     .seh_save_freg_x d12, 32
     str d13, [sp, #8]