[llvm] 298e9ca - [MC] [Win64EH] Check that the SEH unwind opcodes match the actual instructions

Martin Storsjö via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 1 01:26:40 PDT 2022


Author: Martin Storsjö
Date: 2022-06-01T11:25:49+03:00
New Revision: 298e9cac9204b788dd6a18dba669b40acf2aaa3c

URL: https://github.com/llvm/llvm-project/commit/298e9cac9204b788dd6a18dba669b40acf2aaa3c
DIFF: https://github.com/llvm/llvm-project/commit/298e9cac9204b788dd6a18dba669b40acf2aaa3c.diff

LOG: [MC] [Win64EH] Check that the SEH unwind opcodes match the actual instructions

It's a fairly common issue that the generating code incorrectly marks
instructions as narrow or wide; check that the instruction lengths
add up to the expected value, and error out if it doesn't. This allows
catching code generation bugs.

Also check that prologs and epilogs are properly terminated, to
catch other code generation issues.

Differential Revision: https://reviews.llvm.org/D125647

Added: 
    llvm/test/MC/ARM/seh-checks.s
    llvm/test/MC/ARM/seh-checks2.s

Modified: 
    llvm/include/llvm/MC/MCWinEH.h
    llvm/lib/MC/MCWin64EH.cpp
    llvm/lib/MC/WinCOFFObjectWriter.cpp
    llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MC/MCWinEH.h b/llvm/include/llvm/MC/MCWinEH.h
index 233ac2c50fafb..10fc034b576e9 100644
--- a/llvm/include/llvm/MC/MCWinEH.h
+++ b/llvm/include/llvm/MC/MCWinEH.h
@@ -59,6 +59,7 @@ struct FrameInfo {
   struct Epilog {
     std::vector<Instruction> Instructions;
     unsigned Condition;
+    MCSymbol *End;
   };
   MapVector<MCSymbol *, Epilog> EpilogMap;
 

diff  --git a/llvm/lib/MC/MCWin64EH.cpp b/llvm/lib/MC/MCWin64EH.cpp
index 67b3d12729427..7ce5d8cdafa0a 100644
--- a/llvm/lib/MC/MCWin64EH.cpp
+++ b/llvm/lib/MC/MCWin64EH.cpp
@@ -1197,7 +1197,8 @@ static uint32_t ARMCountOfUnwindCodes(ArrayRef<WinEH::Instruction> Insns) {
   return Count;
 }
 
-static uint32_t ARMCountOfInstructionBytes(ArrayRef<WinEH::Instruction> Insns) {
+static uint32_t ARMCountOfInstructionBytes(ArrayRef<WinEH::Instruction> Insns,
+                                           bool *HasCustom = nullptr) {
   uint32_t Count = 0;
   for (const auto &I : Insns) {
     switch (static_cast<Win64EH::UnwindOpcodes>(I.Operation)) {
@@ -1247,12 +1248,50 @@ static uint32_t ARMCountOfInstructionBytes(ArrayRef<WinEH::Instruction> Insns) {
       // We can't reason about what instructions this maps to; return a
       // phony number to make sure we don't accidentally do epilog packing.
       Count += 1000;
+      if (HasCustom)
+        *HasCustom = true;
       break;
     }
   }
   return Count;
 }
 
+static void checkARMInstructions(MCStreamer &Streamer,
+                                 ArrayRef<WinEH::Instruction> Insns,
+                                 const MCSymbol *Begin, const MCSymbol *End,
+                                 StringRef Name, StringRef Type) {
+  if (!End)
+    return;
+  Optional<int64_t> MaybeDistance =
+      GetOptionalAbsDifference(Streamer, End, Begin);
+  if (!MaybeDistance)
+    return;
+  uint32_t Distance = (uint32_t)*MaybeDistance;
+  bool HasCustom = false;
+  uint32_t InstructionBytes = ARMCountOfInstructionBytes(Insns, &HasCustom);
+  if (HasCustom)
+    return;
+  if (Distance != InstructionBytes) {
+    Streamer.getContext().reportError(
+        SMLoc(), "Incorrect size for " + Name + " " + Type + ": " +
+                     Twine(Distance) +
+                     " bytes of instructions in range, but .seh directives "
+                     "corresponding to " +
+                     Twine(InstructionBytes) + " bytes\n");
+  }
+}
+
+static bool isARMTerminator(const WinEH::Instruction &inst) {
+  switch (static_cast<Win64EH::UnwindOpcodes>(inst.Operation)) {
+  case Win64EH::UOP_End:
+  case Win64EH::UOP_EndNop:
+  case Win64EH::UOP_WideEndNop:
+    return true;
+  default:
+    return false;
+  }
+}
+
 // Unwind opcode encodings and restrictions are documented at
 // https://docs.microsoft.com/en-us/cpp/build/arm-exception-handling
 static void ARMEmitUnwindCode(MCStreamer &streamer,
@@ -1960,6 +1999,27 @@ static void ARMEmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info,
   streamer.emitLabel(Label);
   info->Symbol = Label;
 
+  if (!info->PrologEnd)
+    streamer.getContext().reportError(SMLoc(), "Prologue in " +
+                                                   info->Function->getName() +
+                                                   " not correctly terminated");
+
+  if (info->PrologEnd && !info->Fragment)
+    checkARMInstructions(streamer, info->Instructions, info->Begin,
+                         info->PrologEnd, info->Function->getName(),
+                         "prologue");
+  for (auto &I : info->EpilogMap) {
+    MCSymbol *EpilogStart = I.first;
+    auto &Epilog = I.second;
+    checkARMInstructions(streamer, Epilog.Instructions, EpilogStart, Epilog.End,
+                         info->Function->getName(), "epilogue");
+    if (Epilog.Instructions.empty() ||
+        !isARMTerminator(Epilog.Instructions.back()))
+      streamer.getContext().reportError(
+          SMLoc(), "Epilogue in " + info->Function->getName() +
+                       " not correctly terminated");
+  }
+
   Optional<int64_t> RawFuncLength;
   const MCExpr *FuncLengthExpr = nullptr;
   if (!info->FuncletOrFuncEnd) {

diff  --git a/llvm/lib/MC/WinCOFFObjectWriter.cpp b/llvm/lib/MC/WinCOFFObjectWriter.cpp
index 1de1532ea38c5..33e496b7a8645 100644
--- a/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -958,7 +958,7 @@ void WinCOFFObjectWriter::assignFileOffsets(MCAssembler &Asm,
   for (const auto &Section : Asm) {
     COFFSection *Sec = SectionMap[&Section];
 
-    if (Sec->Number == -1)
+    if (!Sec || Sec->Number == -1)
       continue;
 
     Sec->Header.SizeOfRawData = Layout.getSectionAddressSize(&Section);

diff  --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index 1498b0975a9a6..883a1d1cdec9c 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -11,6 +11,7 @@
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCWin64EH.h"
 #include "llvm/MC/MCWinCOFFStreamer.h"
@@ -237,6 +238,12 @@ void ARMTargetWinCOFFStreamer::emitARMWinCFIEpilogEnd() {
   if (!CurFrame)
     return;
 
+  if (!CurrentEpilog) {
+    S.getContext().reportError(SMLoc(), "Stray .seh_endepilogue in " +
+                                            CurFrame->Function->getName());
+    return;
+  }
+
   std::vector<WinEH::Instruction> &Epilog =
       CurFrame->EpilogMap[CurrentEpilog].Instructions;
 
@@ -255,6 +262,8 @@ void ARMTargetWinCOFFStreamer::emitARMWinCFIEpilogEnd() {
   InEpilogCFI = false;
   WinEH::Instruction Inst = WinEH::Instruction(UnwindCode, nullptr, -1, 0);
   CurFrame->EpilogMap[CurrentEpilog].Instructions.push_back(Inst);
+  MCSymbol *Label = S.emitCFILabel();
+  CurFrame->EpilogMap[CurrentEpilog].End = Label;
   CurrentEpilog = nullptr;
 }
 

diff  --git a/llvm/test/MC/ARM/seh-checks.s b/llvm/test/MC/ARM/seh-checks.s
new file mode 100644
index 0000000000000..b4b9be432d783
--- /dev/null
+++ b/llvm/test/MC/ARM/seh-checks.s
@@ -0,0 +1,51 @@
+// This test checks error reporting for mismatched prolog/epilog lengths
+
+// RUN: not llvm-mc -triple thumbv7-pc-win32 -filetype=obj -o /dev/null %s 2>&1 | FileCheck %s
+
+// CHECK-NOT: func1
+// CHECK: error: Incorrect size for func2 epilogue: 6 bytes of instructions in range, but .seh directives corresponding to 4 bytes
+// CHECK: error: Incorrect size for func3 prologue: 4 bytes of instructions in range, but .seh directives corresponding to 2 bytes
+
+        .text
+        .syntax unified
+
+        .seh_proc func1
+func1:
+        // Instruction with indeterminate length
+        b other
+        .seh_endprologue
+        nop
+        .seh_startepilogue
+        // The p2align causes the length of the epilogue to be unknown, so
+        // we can't report errors about the mismatch here.
+        .p2align 1
+        pop {r4-r7-lr}
+        .seh_save_regs {r4-r7,lr}
+        bx lr
+        .seh_nop
+        .seh_endepilogue
+        .seh_endproc
+
+        .seh_proc func2
+func2:
+        .seh_endprologue
+        nop
+        .seh_startepilogue
+        // As we're popping into lr instead of directly into pc, this pop
+        // becomes a wide instruction.
+        pop {r4-r7,lr}
+        // The directive things we're making a narrow instruction, which
+        // is wrong.
+        .seh_save_regs {r4-r7,lr}
+        bx lr
+        .seh_nop
+        .seh_endepilogue
+        .seh_endproc
+
+        .seh_proc func3
+func3:
+        nop.w
+        .seh_nop
+        .seh_endprologue
+        nop
+        .seh_endproc

diff  --git a/llvm/test/MC/ARM/seh-checks2.s b/llvm/test/MC/ARM/seh-checks2.s
new file mode 100644
index 0000000000000..c8c525649f975
--- /dev/null
+++ b/llvm/test/MC/ARM/seh-checks2.s
@@ -0,0 +1,74 @@
+// This test checks error reporting for missing ending/starting of prologues/epilogues
+
+// RUN: not llvm-mc -triple thumbv7-pc-win32 -filetype=obj -o /dev/null %s 2>&1 | FileCheck %s
+
+// CHECK: error: Stray .seh_endepilogue in func1
+// CHECK: error: Prologue in func2 not correctly terminated
+// CHECK: error: Epilogue in func3 not correctly terminated
+// CHECK: error: Epilogue in func4 not correctly terminated
+
+        .text
+        .syntax unified
+
+        .seh_proc func1
+func1:
+        sub sp, #16
+        .seh_stackalloc 16
+        .seh_endprologue
+        nop
+        // Missing .seh_startepilogue
+        add sp, #16
+        .seh_stackalloc 16
+        bx lr
+        .seh_nop
+        .seh_endepilogue
+        .seh_endproc
+
+        .seh_proc func2
+func2:
+        sub sp, #16
+        .seh_stackalloc 16
+        // Missing .seh_endprologue
+        nop
+        .seh_startepilogue
+        add sp, #16
+        .seh_stackalloc 16
+        bx lr
+        .seh_nop
+        .seh_endepilogue
+        .seh_endproc
+
+        .seh_proc func3
+func3:
+        sub sp, #16
+        .seh_stackalloc 16
+        .seh_endprologue
+        nop
+        .seh_startepilogue
+        add sp, #16
+        .seh_stackalloc 16
+        bx lr
+        .seh_nop
+        // Missing .seh_endepilogue
+        .seh_endproc
+
+        .seh_proc func4
+func4:
+        sub sp, #16
+        .seh_stackalloc 16
+        .seh_endprologue
+        nop
+        .seh_startepilogue
+        add sp, #16
+        .seh_stackalloc 16
+        bx lr
+        .seh_nop
+        // Missing .seh_endepilogue
+        nop
+        .seh_startepilogue
+        add sp, #16
+        .seh_stackalloc 16
+        bx lr
+        .seh_nop
+        .seh_endepilogue
+        .seh_endproc


        


More information about the llvm-commits mailing list