[llvm] d8e67c1 - [ARM] Add SEH opcodes in frame lowering

Martin Storsjö via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 2 02:29:56 PDT 2022


Author: Martin Storsjö
Date: 2022-06-02T12:28:46+03:00
New Revision: d8e67c1cccd8fcb62230166caea744592288da17

URL: https://github.com/llvm/llvm-project/commit/d8e67c1cccd8fcb62230166caea744592288da17
DIFF: https://github.com/llvm/llvm-project/commit/d8e67c1cccd8fcb62230166caea744592288da17.diff

LOG: [ARM] Add SEH opcodes in frame lowering

Skip inserting regular CFI instructions if using WinCFI.

This is based a fair amount on the corresponding ARM64 implementation,
but instead of trying to insert the SEH opcodes one by one where
we generate other prolog/epilog instructions, we try to walk over the
whole prolog/epilog range and insert them. This is done because in
many cases, the exact number of instructions inserted is abstracted
away deeper.

For some cases, we manually insert specific SEH opcodes directly where
instructions are generated, where the automatic mapping of instructions
to SEH opcodes doesn't hold up (e.g. for __chkstk stack probes).

Skip Thumb2SizeReduction for SEH prologs/epilogs, and force
tail calls to wide instructions (just like on MachO), to make sure
that the unwind info actually matches the width of the final
instructions, without heuristics about what later passes will do.

Mark SEH instructions as scheduling boundaries, to make sure that they
aren't reordered away from the instruction they describe by
PostRAScheduler.

Mark the SEH instructions with the NoMerge flag, to avoid doing
tail merging of functions that have multiple epilogs that all end
with the same sequence of "b <other>; .seh_nop_w, .seh_endepilogue".

Differential Revision: https://reviews.llvm.org/D125648

Added: 
    llvm/test/CodeGen/ARM/Windows/wineh-opcodes.ll
    llvm/test/CodeGen/ARM/Windows/wineh-tailmerge.ll

Modified: 
    llvm/lib/Target/ARM/ARMAsmPrinter.cpp
    llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
    llvm/lib/Target/ARM/ARMBaseInstrInfo.h
    llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
    llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
    llvm/lib/Target/ARM/ARMFrameLowering.cpp
    llvm/lib/Target/ARM/ARMInstrInfo.td
    llvm/lib/Target/ARM/ARMInstrThumb2.td
    llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
    llvm/test/CodeGen/ARM/Windows/dllimport.ll
    llvm/test/DebugInfo/COFF/ARMNT/arm-register-variables.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index e7b1aeb6825c1..f699a5b934c2f 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -2274,6 +2274,47 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
     EmitToStreamer(*OutStreamer, TmpInstSB);
     return;
   }
+
+  case ARM::SEH_StackAlloc:
+    ATS.emitARMWinCFIAllocStack(MI->getOperand(0).getImm(),
+                                MI->getOperand(1).getImm());
+    return;
+
+  case ARM::SEH_SaveRegs:
+  case ARM::SEH_SaveRegs_Ret:
+    ATS.emitARMWinCFISaveRegMask(MI->getOperand(0).getImm(),
+                                 MI->getOperand(1).getImm());
+    return;
+
+  case ARM::SEH_SaveSP:
+    ATS.emitARMWinCFISaveSP(MI->getOperand(0).getImm());
+    return;
+
+  case ARM::SEH_SaveFRegs:
+    ATS.emitARMWinCFISaveFRegs(MI->getOperand(0).getImm(),
+                               MI->getOperand(1).getImm());
+    return;
+
+  case ARM::SEH_SaveLR:
+    ATS.emitARMWinCFISaveLR(MI->getOperand(0).getImm());
+    return;
+
+  case ARM::SEH_Nop:
+  case ARM::SEH_Nop_Ret:
+    ATS.emitARMWinCFINop(MI->getOperand(0).getImm());
+    return;
+
+  case ARM::SEH_PrologEnd:
+    ATS.emitARMWinCFIPrologEnd(/*Fragment=*/false);
+    return;
+
+  case ARM::SEH_EpilogStart:
+    ATS.emitARMWinCFIEpilogStart(ARMCC::AL);
+    return;
+
+  case ARM::SEH_EpilogEnd:
+    ATS.emitARMWinCFIEpilogEnd();
+    return;
   }
 
   MCInst TmpInst;

diff  --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 7fa253a200cb7..44dfd00d49635 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -2071,6 +2071,9 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
   if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
     return true;
 
+  if (isSEHInstruction(MI))
+    return true;
+
   // Treat the start of the IT block as a scheduling boundary, but schedule
   // t2IT along with all instructions following it.
   // FIXME: This is a big hammer. But the alternative is to add all potential

diff  --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 40acb27d1eb1f..3b8f3403e3c36 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -757,6 +757,26 @@ static inline bool isValidCoprocessorNumber(unsigned Num,
   return true;
 }
 
+static inline bool isSEHInstruction(const MachineInstr &MI) {
+  unsigned Opc = MI.getOpcode();
+  switch (Opc) {
+  case ARM::SEH_StackAlloc:
+  case ARM::SEH_SaveRegs:
+  case ARM::SEH_SaveRegs_Ret:
+  case ARM::SEH_SaveSP:
+  case ARM::SEH_SaveFRegs:
+  case ARM::SEH_SaveLR:
+  case ARM::SEH_Nop:
+  case ARM::SEH_Nop_Ret:
+  case ARM::SEH_PrologEnd:
+  case ARM::SEH_EpilogStart:
+  case ARM::SEH_EpilogEnd:
+    return true;
+  default:
+    return false;
+  }
+}
+
 /// getInstrPredicate - If instruction is predicated, returns its predicate
 /// condition, otherwise returns AL. It also returns the condition code
 /// register by reference.

diff  --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index 012c57afacb39..7ee9bd56452ee 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -214,6 +214,8 @@ class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
                             unsigned DefSubReg,
                             const TargetRegisterClass *SrcRC,
                             unsigned SrcSubReg) const override;
+
+  int getSEHRegNum(unsigned i) const { return getEncodingValue(i); }
 };
 
 } // end namespace llvm

diff  --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 61fe27095e4f6..613904f702f0c 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/Debug.h"
 
 using namespace llvm;
@@ -2107,6 +2108,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     case ARM::TCRETURNdi:
     case ARM::TCRETURNri: {
       MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+      if (MBBI->getOpcode() == ARM::SEH_EpilogEnd)
+        MBBI--;
+      if (MBBI->getOpcode() == ARM::SEH_Nop_Ret)
+        MBBI--;
       assert(MBBI->isReturn() &&
              "Can only insert epilog into returning blocks");
       unsigned RetOpcode = MBBI->getOpcode();
@@ -2116,13 +2121,21 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
 
       // Tail call return: adjust the stack pointer and jump to callee.
       MBBI = MBB.getLastNonDebugInstr();
+      if (MBBI->getOpcode() == ARM::SEH_EpilogEnd)
+        MBBI--;
+      if (MBBI->getOpcode() == ARM::SEH_Nop_Ret)
+        MBBI--;
       MachineOperand &JumpTarget = MBBI->getOperand(0);
 
       // Jump to label or value in register.
       if (RetOpcode == ARM::TCRETURNdi) {
+        MachineFunction *MF = MBB.getParent();
+        bool NeedsWinCFI = MF->getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+                           MF->getFunction().needsUnwindTableEntry();
         unsigned TCOpcode =
             STI->isThumb()
-                ? (STI->isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND)
+                ? ((STI->isTargetMachO() || NeedsWinCFI) ? ARM::tTAILJMPd
+                                                         : ARM::tTAILJMPdND)
                 : ARM::TAILJMPd;
         MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
         if (JumpTarget.isGlobal())

diff  --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index f3d95f057e275..71801f420217a 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -138,6 +138,7 @@
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCInstrDesc.h"
@@ -272,6 +273,187 @@ static int getArgumentStackToRestore(MachineFunction &MF,
   return ArgumentPopSize;
 }
 
+static bool needsWinCFI(const MachineFunction &MF) {
+  const Function &F = MF.getFunction();
+  return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+         F.needsUnwindTableEntry();
+}
+
+// Given a load or a store instruction, generate an appropriate unwinding SEH
+// code on Windows.
+static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI,
+                                             const TargetInstrInfo &TII,
+                                             unsigned Flags) {
+  unsigned Opc = MBBI->getOpcode();
+  MachineBasicBlock *MBB = MBBI->getParent();
+  MachineFunction &MF = *MBB->getParent();
+  DebugLoc DL = MBBI->getDebugLoc();
+  MachineInstrBuilder MIB;
+  const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
+  const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+
+  Flags |= MachineInstr::NoMerge;
+
+  switch (Opc) {
+  default:
+    report_fatal_error("No SEH Opcode for instruction " + TII.getName(Opc));
+    break;
+  case ARM::t2ADDri:   // add.w r11, sp, #xx
+  case ARM::t2ADDri12: // add.w r11, sp, #xx
+  case ARM::t2SUBri:   // sub.w r4, r11, #xx
+  case ARM::t2MOVTi16: // movt  r4, #xx
+  case ARM::t2MOVi16:  // movw  r4, #xx
+  case ARM::tBL:       // bl __chkstk
+    // These are harmless if used for just setting up a frame pointer,
+    // but that frame pointer can't be relied upon for unwinding, unless
+    // set up with SEH_SaveSP.
+    MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
+              .addImm(/*Wide=*/1)
+              .setMIFlags(Flags);
+    break;
+
+  case ARM::tBLXr: // blx r12 (__chkstk)
+    MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
+              .addImm(/*Wide=*/0)
+              .setMIFlags(Flags);
+    break;
+
+  case ARM::t2MOVi32imm: // movw+movt
+    // This pseudo instruction expands into two mov instructions. If the
+    // second operand is a symbol reference, this will stay as two wide
+    // instructions, movw+movt. If they're immediates, the first one can
+    // end up as a narrow mov though.
+    // As two SEH instructions are appended here, they won't get interleaved
+    // between the two final movw/movt instructions, but it doesn't make any
+    // practical 
diff erence.
+    MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
+              .addImm(/*Wide=*/1)
+              .setMIFlags(Flags);
+    MBB->insertAfter(MBBI, MIB);
+    MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
+              .addImm(/*Wide=*/1)
+              .setMIFlags(Flags);
+    break;
+
+  case ARM::t2LDMIA_RET:
+  case ARM::t2LDMIA_UPD:
+  case ARM::t2STMDB_UPD: {
+    unsigned Mask = 0;
+    for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
+      const MachineOperand &MO = MBBI->getOperand(i);
+      if (!MO.isReg() || MO.isImplicit())
+        continue;
+      unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
+      if (Reg == 15)
+        Reg = 14;
+      Mask |= 1 << Reg;
+    }
+    unsigned SEHOpc =
+        (Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
+    MIB = BuildMI(MF, DL, TII.get(SEHOpc))
+              .addImm(Mask)
+              .addImm(/*Wide=*/1)
+              .setMIFlags(Flags);
+    break;
+  }
+  case ARM::VSTMDDB_UPD:
+  case ARM::VLDMDIA_UPD: {
+    int First = -1, Last = 0;
+    for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
+      const MachineOperand &MO = MBBI->getOperand(i);
+      unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
+      if (First == -1)
+        First = Reg;
+      Last = Reg;
+    }
+    MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveFRegs))
+              .addImm(First)
+              .addImm(Last)
+              .setMIFlags(Flags);
+    break;
+  }
+  case ARM::tSUBspi:
+  case ARM::tADDspi:
+    MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
+              .addImm(MBBI->getOperand(2).getImm() * 4)
+              .addImm(/*Wide=*/0)
+              .setMIFlags(Flags);
+    break;
+  case ARM::t2SUBspImm:
+  case ARM::t2SUBspImm12:
+  case ARM::t2ADDspImm:
+  case ARM::t2ADDspImm12:
+    MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
+              .addImm(MBBI->getOperand(2).getImm())
+              .addImm(/*Wide=*/1)
+              .setMIFlags(Flags);
+    break;
+
+  case ARM::tMOVr:
+    if (MBBI->getOperand(1).getReg() == ARM::SP &&
+        (Flags & MachineInstr::FrameSetup)) {
+      unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
+      MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
+                .addImm(Reg)
+                .setMIFlags(Flags);
+    } else if (MBBI->getOperand(0).getReg() == ARM::SP &&
+               (Flags & MachineInstr::FrameDestroy)) {
+      unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
+      MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
+                .addImm(Reg)
+                .setMIFlags(Flags);
+    } else {
+      report_fatal_error("No SEH Opcode for MOV");
+    }
+    break;
+
+  case ARM::tBX_RET:
+  case ARM::TCRETURNri:
+    MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
+              .addImm(/*Wide=*/0)
+              .setMIFlags(Flags);
+    break;
+
+  case ARM::TCRETURNdi:
+    MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
+              .addImm(/*Wide=*/1)
+              .setMIFlags(Flags);
+    break;
+  }
+  return MBB->insertAfter(MBBI, MIB);
+}
+
+static MachineBasicBlock::iterator
+initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI) {
+  if (MBBI == MBB.begin())
+    return MachineBasicBlock::iterator();
+  return std::prev(MBBI);
+}
+
+static void insertSEHRange(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator Start,
+                           const MachineBasicBlock::iterator &End,
+                           const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+  if (Start.isValid())
+    Start = std::next(Start);
+  else
+    Start = MBB.begin();
+
+  for (auto MI = Start; MI != End;) {
+    auto Next = std::next(MI);
+    // Check if this instruction already has got a SEH opcode added. In that
+    // case, don't do this generic mapping.
+    if (Next != End && isSEHInstruction(*Next)) {
+      MI = std::next(Next);
+      while (MI != End && isSEHInstruction(*MI))
+        ++MI;
+      continue;
+    }
+    insertSEH(MI, TII, MIFlags);
+    MI = Next;
+  }
+}
+
 static void emitRegPlusImmediate(
     bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
     const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
@@ -481,6 +663,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   unsigned NumBytes = MFI.getStackSize();
   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
   int FPCXTSaveSize = 0;
+  bool NeedsWinCFI = needsWinCFI(MF);
 
   // Debug location must be unknown since the first debug location is used
   // to determine the end of the prologue.
@@ -509,7 +692,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
                    MachineInstr::FrameSetup);
       DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes, true);
     }
-    DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
+    if (!NeedsWinCFI)
+      DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
+    if (NeedsWinCFI && MBBI != MBB.begin()) {
+      insertSEHRange(MBB, {}, MBBI, TII, MachineInstr::FrameSetup);
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd))
+          .setMIFlag(MachineInstr::FrameSetup);
+      MF.setHasWinCFI(true);
+    }
     return;
   }
 
@@ -646,15 +836,25 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
     uint32_t NumWords = NumBytes >> 2;
 
-    if (NumWords < 65536)
+    if (NumWords < 65536) {
       BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
           .addImm(NumWords)
           .setMIFlags(MachineInstr::FrameSetup)
           .add(predOps(ARMCC::AL));
-    else
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4)
-        .addImm(NumWords)
-        .setMIFlags(MachineInstr::FrameSetup);
+    } else {
+      // Split into two instructions here, instead of using t2MOVi32imm,
+      // to allow inserting accurate SEH instructions (including accurate
+      // instruction size for each of them).
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
+          .addImm(NumWords & 0xffff)
+          .setMIFlags(MachineInstr::FrameSetup)
+          .add(predOps(ARMCC::AL));
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), ARM::R4)
+          .addReg(ARM::R4)
+          .addImm(NumWords >> 16)
+          .setMIFlags(MachineInstr::FrameSetup)
+          .add(predOps(ARMCC::AL));
+    }
 
     switch (TM.getCodeModel()) {
     case CodeModel::Tiny:
@@ -681,12 +881,20 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
       break;
     }
 
-    BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
-        .addReg(ARM::SP, RegState::Kill)
-        .addReg(ARM::R4, RegState::Kill)
-        .setMIFlags(MachineInstr::FrameSetup)
-        .add(predOps(ARMCC::AL))
-        .add(condCodeOp());
+    MachineInstrBuilder Instr, SEH;
+    Instr = BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
+                .addReg(ARM::SP, RegState::Kill)
+                .addReg(ARM::R4, RegState::Kill)
+                .setMIFlags(MachineInstr::FrameSetup)
+                .add(predOps(ARMCC::AL))
+                .add(condCodeOp());
+    if (NeedsWinCFI) {
+      SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc))
+                .addImm(NumBytes)
+                .addImm(/*Wide=*/1)
+                .setMIFlags(MachineInstr::FrameSetup);
+      MBB.insertAfter(Instr, SEH);
+    }
     NumBytes = 0;
   }
 
@@ -726,27 +934,38 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
                          dl, TII, FramePtr, ARM::SP,
                          PushSize + FramePtrOffsetInPush,
                          MachineInstr::FrameSetup);
-    if (FramePtrOffsetInPush + PushSize != 0) {
-      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
-          nullptr, MRI->getDwarfRegNum(FramePtr, true),
-          FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
-      BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex)
-          .setMIFlags(MachineInstr::FrameSetup);
-    } else {
-      unsigned CFIIndex =
-          MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
-              nullptr, MRI->getDwarfRegNum(FramePtr, true)));
-      BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex)
-          .setMIFlags(MachineInstr::FrameSetup);
+    if (!NeedsWinCFI) {
+      if (FramePtrOffsetInPush + PushSize != 0) {
+        unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
+            nullptr, MRI->getDwarfRegNum(FramePtr, true),
+            FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
+        BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+            .addCFIIndex(CFIIndex)
+            .setMIFlags(MachineInstr::FrameSetup);
+      } else {
+        unsigned CFIIndex =
+            MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
+                nullptr, MRI->getDwarfRegNum(FramePtr, true)));
+        BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+            .addCFIIndex(CFIIndex)
+            .setMIFlags(MachineInstr::FrameSetup);
+      }
     }
   }
 
+  // Emit a SEH opcode indicating the prologue end. The rest of the prologue
+  // instructions below don't need to be replayed to unwind the stack.
+  if (NeedsWinCFI && MBBI != MBB.begin()) {
+    insertSEHRange(MBB, {}, MBBI, TII, MachineInstr::FrameSetup);
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd))
+        .setMIFlag(MachineInstr::FrameSetup);
+    MF.setHasWinCFI(true);
+  }
+
   // Now that the prologue's actual instructions are finalised, we can insert
   // the necessary DWARF cf instructions to describe the situation. Start by
   // recording where each register ended up:
-  if (GPRCS1Size > 0) {
+  if (GPRCS1Size > 0 && !NeedsWinCFI) {
     MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
     int CFIIndex;
     for (const auto &Entry : CSI) {
@@ -780,7 +999,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
     }
   }
 
-  if (GPRCS2Size > 0) {
+  if (GPRCS2Size > 0 && !NeedsWinCFI) {
     MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
     for (const auto &Entry : CSI) {
       Register Reg = Entry.getReg();
@@ -806,7 +1025,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
     }
   }
 
-  if (DPRCSSize > 0) {
+  if (DPRCSSize > 0 && !NeedsWinCFI) {
     // Since vpush register list cannot have gaps, there may be multiple vpush
     // instructions in the prologue.
     MachineBasicBlock::iterator Pos = std::next(LastPush);
@@ -830,7 +1049,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   // throughout the process. If we have a frame pointer, it takes over the job
   // half-way through, so only the first few .cfi_def_cfa_offset instructions
   // actually get emitted.
-  DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
+  if (!NeedsWinCFI)
+    DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
 
   if (STI.isTargetELF() && hasFP(MF))
     MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
@@ -927,7 +1147,14 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
   DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
 
+  MachineBasicBlock::iterator RangeStart;
   if (!AFI->hasStackFrame()) {
+    if (MF.hasWinCFI()) {
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
+          .setMIFlag(MachineInstr::FrameDestroy);
+      RangeStart = initMBBRange(MBB, MBBI);
+    }
+
     if (NumBytes + IncomingArgStackToRestore != 0)
       emitSPUpdate(isARM, MBB, MBBI, dl, TII,
                    NumBytes + IncomingArgStackToRestore,
@@ -943,6 +1170,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
         ++MBBI;
     }
 
+    if (MF.hasWinCFI()) {
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
+          .setMIFlag(MachineInstr::FrameDestroy);
+      RangeStart = initMBBRange(MBB, MBBI);
+    }
+
     // Move SP to start of FP callee save spill area.
     NumBytes -= (ReservedArgStack +
                  AFI->getFPCXTSaveAreaSize() +
@@ -1029,6 +1262,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
     if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction())
       BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT));
   }
+
+  if (MF.hasWinCFI()) {
+    insertSEHRange(MBB, RangeStart, MBB.end(), TII, MachineInstr::FrameDestroy);
+    BuildMI(MBB, MBB.end(), dl, TII.get(ARM::SEH_EpilogEnd))
+        .setMIFlag(MachineInstr::FrameDestroy);
+  }
 }
 
 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
@@ -2596,17 +2835,19 @@ void ARMFrameLowering::adjustForSegmentedStacks(
 
   // Emit the relevant DWARF information about the change in stack pointer as
   // well as where to find both r4 and r5 (the callee-save registers)
-  CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 8));
-  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
-      .addCFIIndex(CFIIndex);
-  CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
-      nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
-  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
-      .addCFIIndex(CFIIndex);
-  CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
-      nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
-  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
-      .addCFIIndex(CFIIndex);
+  if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
+    CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 8));
+    BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+        .addCFIIndex(CFIIndex);
+    CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+        nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
+    BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+        .addCFIIndex(CFIIndex);
+    CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+        nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
+    BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+        .addCFIIndex(CFIIndex);
+  }
 
   // mov SR1, sp
   if (Thumb) {
@@ -2808,13 +3049,15 @@ void ARMFrameLowering::adjustForSegmentedStacks(
 
   // Emit the DWARF info about the change in stack as well as where to find the
   // previous link register
-  CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 12));
-  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
-      .addCFIIndex(CFIIndex);
-  CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+  if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
+    CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 12));
+    BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+        .addCFIIndex(CFIIndex);
+    CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
         nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
-  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
-      .addCFIIndex(CFIIndex);
+    BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+        .addCFIIndex(CFIIndex);
+  }
 
   // Call __morestack().
   if (Thumb) {
@@ -2870,9 +3113,11 @@ void ARMFrameLowering::adjustForSegmentedStacks(
   }
 
   // Update the CFA offset now that we've popped
-  CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
-  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
-      .addCFIIndex(CFIIndex);
+  if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
+    CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
+    BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+        .addCFIIndex(CFIIndex);
+  }
 
   // Return from this function.
   BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
@@ -2894,20 +3139,22 @@ void ARMFrameLowering::adjustForSegmentedStacks(
   }
 
   // Update the CFA offset now that we've popped
-  CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
-  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
-      .addCFIIndex(CFIIndex);
-
-  // Tell debuggers that r4 and r5 are now the same as they were in the
-  // previous function, that they're the "Same Value".
-  CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(
-      nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
-  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
-      .addCFIIndex(CFIIndex);
-  CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(
-      nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
-  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
-      .addCFIIndex(CFIIndex);
+  if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
+    CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
+    BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+        .addCFIIndex(CFIIndex);
+
+    // Tell debuggers that r4 and r5 are now the same as they were in the
+    // previous function, that they're the "Same Value".
+    CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(
+        nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
+    BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+        .addCFIIndex(CFIIndex);
+    CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(
+        nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
+    BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+        .addCFIIndex(CFIIndex);
+  }
 
   // Organizing MBB lists
   PostStackMBB->addSuccessor(&PrologueMBB);

diff  --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index e170087e5baec..88bb74d1fc54b 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -6476,3 +6476,24 @@ def CompilerBarrier : PseudoInst<(outs), (ins i32imm:$ordering), NoItinerary,
   let AsmString = "@ COMPILER BARRIER";
   let hasNoSchedulingInfo = 1;
 }
+
+//===----------------------------------------------------------------------===//
+// Instructions used for emitting unwind opcodes on Windows.
+//===----------------------------------------------------------------------===//
+let isPseudo = 1 in {
+  def SEH_StackAlloc : PseudoInst<(outs), (ins i32imm:$size, i32imm:$wide), NoItinerary, []>, Sched<[]>;
+  def SEH_SaveRegs : PseudoInst<(outs), (ins i32imm:$mask, i32imm:$wide), NoItinerary, []>, Sched<[]>;
+  let isTerminator = 1 in
+  def SEH_SaveRegs_Ret : PseudoInst<(outs), (ins i32imm:$mask, i32imm:$wide), NoItinerary, []>, Sched<[]>;
+  def SEH_SaveSP : PseudoInst<(outs), (ins i32imm:$reg), NoItinerary, []>, Sched<[]>;
+  def SEH_SaveFRegs : PseudoInst<(outs), (ins i32imm:$first, i32imm:$last), NoItinerary, []>, Sched<[]>;
+  let isTerminator = 1 in
+  def SEH_SaveLR : PseudoInst<(outs), (ins i32imm:$offst), NoItinerary, []>, Sched<[]>;
+  def SEH_Nop : PseudoInst<(outs), (ins i32imm:$wide), NoItinerary, []>, Sched<[]>;
+  let isTerminator = 1 in
+  def SEH_Nop_Ret : PseudoInst<(outs), (ins i32imm:$wide), NoItinerary, []>, Sched<[]>;
+  def SEH_PrologEnd : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
+  def SEH_EpilogStart : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
+  let isTerminator = 1 in
+  def SEH_EpilogEnd : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
+}

diff  --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 0119a80505774..20d8a45aaf491 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -3951,6 +3951,7 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
 
 // Tail calls. The MachO version of thumb tail calls uses a t2 branch, so
 // it goes here.
+// Windows SEH unwinding also needs a strict t2 branch for tail calls.
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
   // IOS version.
   let Uses = [SP] in
@@ -3958,7 +3959,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
                    (ins thumb_br_target:$dst, pred:$p),
                    4, IIC_Br, [],
                    (t2B thumb_br_target:$dst, pred:$p)>,
-                 Requires<[IsThumb2, IsMachO]>, Sched<[WriteBr]>;
+                 Requires<[IsThumb2]>, Sched<[WriteBr]>;
 }
 
 // IT block

diff  --git a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index cdbcc9527e156..da4e7bad870da 100644
--- a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -27,6 +27,7 @@
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
@@ -205,11 +206,11 @@ namespace {
                         bool IsSelfLoop);
 
     /// ReduceMI - Attempt to reduce MI, return true on success.
-    bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
-                  bool LiveCPSR, bool IsSelfLoop);
+    bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, bool LiveCPSR,
+                  bool IsSelfLoop, bool SkipPrologueEpilogue);
 
     /// ReduceMBB - Reduce width of instructions in the specified basic block.
-    bool ReduceMBB(MachineBasicBlock &MBB);
+    bool ReduceMBB(MachineBasicBlock &MBB, bool SkipPrologueEpilogue);
 
     bool OptimizeSize;
     bool MinimizeSize;
@@ -1012,11 +1013,15 @@ static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
 }
 
 bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
-                                bool LiveCPSR, bool IsSelfLoop) {
+                                bool LiveCPSR, bool IsSelfLoop,
+                                bool SkipPrologueEpilogue) {
   unsigned Opcode = MI->getOpcode();
   DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
   if (OPI == ReduceOpcodeMap.end())
     return false;
+  if (SkipPrologueEpilogue && (MI->getFlag(MachineInstr::FrameSetup) ||
+                               MI->getFlag(MachineInstr::FrameDestroy)))
+    return false;
   const ReduceEntry &Entry = ReduceTable[OPI->second];
 
   // Don't attempt normal reductions on "special" cases for now.
@@ -1036,7 +1041,8 @@ bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
   return false;
 }
 
-bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
+bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB,
+                                 bool SkipPrologueEpilogue) {
   bool Modified = false;
 
   // Yes, CPSR could be livein.
@@ -1080,7 +1086,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
     // Does NextMII belong to the same bundle as MI?
     bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred();
 
-    if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) {
+    if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop, SkipPrologueEpilogue)) {
       Modified = true;
       MachineBasicBlock::instr_iterator I = std::prev(NextMII);
       MI = &*I;
@@ -1147,8 +1153,10 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
   // predecessors.
   ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
   bool Modified = false;
+  bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+                     MF.getFunction().needsUnwindTableEntry();
   for (MachineBasicBlock *MBB : RPOT)
-    Modified |= ReduceMBB(*MBB);
+    Modified |= ReduceMBB(*MBB, /*SkipPrologueEpilogue=*/NeedsWinCFI);
   return Modified;
 }
 

diff  --git a/llvm/test/CodeGen/ARM/Windows/dllimport.ll b/llvm/test/CodeGen/ARM/Windows/dllimport.ll
index 6786be3322e37..e8ee982162d51 100644
--- a/llvm/test/CodeGen/ARM/Windows/dllimport.ll
+++ b/llvm/test/CodeGen/ARM/Windows/dllimport.ll
@@ -57,5 +57,5 @@ define arm_aapcs_vfpcc i32 @call_internal() {
 }
 
 ; CHECK-LABEL: call_internal
-; CHECK: b internal
+; CHECK: b.w internal
 

diff  --git a/llvm/test/CodeGen/ARM/Windows/wineh-opcodes.ll b/llvm/test/CodeGen/ARM/Windows/wineh-opcodes.ll
new file mode 100644
index 0000000000000..81808ffdf07e3
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/Windows/wineh-opcodes.ll
@@ -0,0 +1,313 @@
+;; Check that this produces the expected assembly output
+; RUN: llc -mtriple=thumbv7-windows -o - %s -verify-machineinstrs | FileCheck %s
+;; Also try to write an object file, which verifies that the SEH opcodes
+;; match the actual prologue/epilogue length.
+; RUN: llc -mtriple=thumbv7-windows -filetype=obj -o %t.obj %s -verify-machineinstrs
+
+; CHECK-LABEL: clobberR4Frame:
+; CHECK-NEXT: .seh_proc clobberR4Frame
+; CHECK-NEXT: @ %bb.0:                                @ %entry
+; CHECK-NEXT:         push.w  {r4, r7, r11, lr}
+; CHECK-NEXT:         .seh_save_regs_w        {r4, r7, r11, lr}
+; CHECK-NEXT:         add.w   r11, sp, #8
+; CHECK-NEXT:         .seh_nop_w
+; CHECK-NEXT:         .seh_endprologue
+; CHECK-NEXT:         bl      other
+
+; CHECK:              .seh_startepilogue
+; CHECK-NEXT:         pop.w   {r4, r7, r11, pc}
+; CHECK-NEXT:         .seh_save_regs_w        {r4, r7, r11, lr}
+; CHECK-NEXT:         .seh_endepilogue
+; CHECK-NEXT:         .seh_endproc
+
+define arm_aapcs_vfpcc void @clobberR4Frame() uwtable "frame-pointer"="all" {
+entry:
+  call arm_aapcs_vfpcc void @other()
+  call void asm sideeffect "", "~{r4}"()
+  ret void
+}
+
+; CHECK-LABEL: clobberR4NoFrame:
+; CHECK-NEXT: .seh_proc clobberR4NoFrame
+; CHECK-NEXT: @ %bb.0:                                @ %entry
+; CHECK-NEXT:         push.w  {r4, lr}
+; CHECK-NEXT:         .seh_save_regs_w  {r4, lr}
+; CHECK-NEXT:         .seh_endprologue
+; CHECK-NEXT:         bl      other
+
+; CHECK:              .seh_startepilogue
+; CHECK-NEXT:         pop.w   {r4, pc}
+; CHECK-NEXT:         .seh_save_regs_w  {r4, lr}
+; CHECK-NEXT:         .seh_endepilogue
+; CHECK-NEXT:         .seh_endproc
+
+define arm_aapcs_vfpcc void @clobberR4NoFrame() uwtable "frame-pointer"="none" {
+entry:
+  call arm_aapcs_vfpcc void @other()
+  call void asm sideeffect "", "~{r4}"()
+  ret void
+}
+
+; CHECK-LABEL: clobberR4Tail:
+; CHECK-NEXT: .seh_proc clobberR4Tail
+; CHECK-NEXT: @ %bb.0:                                @ %entry
+; CHECK-NEXT:         push.w  {r4, lr}
+; CHECK-NEXT:         .seh_save_regs_w  {r4, lr}
+; CHECK-NEXT:         .seh_endprologue
+
+; CHECK:              .seh_startepilogue
+; CHECK-NEXT:         pop.w   {r4, lr}
+; CHECK-NEXT:         .seh_save_regs_w        {r4, lr}
+; CHECK-NEXT:         b.w     other
+; CHECK-NEXT:         .seh_nop_w
+; CHECK-NEXT:         .seh_endepilogue
+; CHECK-NEXT:         .seh_endproc
+
+define arm_aapcs_vfpcc void @clobberR4Tail() uwtable "frame-pointer"="none" {
+entry:
+  call void asm sideeffect "", "~{r4}"()
+  tail call arm_aapcs_vfpcc void @other()
+  ret void
+}
+
+; CHECK-LABEL: clobberD8D10:
+; CHECK-NEXT: .seh_proc clobberD8D10
+; CHECK-NEXT: @ %bb.0:                                @ %entry
+; CHECK-NEXT:         vpush   {d8, d9, d10}
+; CHECK-NEXT:         .seh_save_fregs {d8-d10}
+; CHECK-NEXT:         .seh_endprologue
+
+; CHECK:              .seh_startepilogue
+; CHECK-NEXT:         vpop    {d8, d9, d10}
+; CHECK-NEXT:         .seh_save_fregs {d8-d10}
+; CHECK-NEXT:         b.w     other
+; CHECK-NEXT:         .seh_nop_w
+; CHECK-NEXT:         .seh_endepilogue
+; CHECK-NEXT:         .seh_endproc
+
+define arm_aapcs_vfpcc void @clobberD8D10() uwtable "frame-pointer"="none" {
+entry:
+  call void asm sideeffect "", "~{d8},~{d9},~{d10}"()
+  tail call arm_aapcs_vfpcc void @other()
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @other()
+
+; CHECK-LABEL: vararg:
+; CHECK-NEXT: .seh_proc vararg
+; CHECK-NEXT: @ %bb.0:                                @ %entry
+; CHECK-NEXT:         sub     sp, #12
+; CHECK-NEXT:         .seh_stackalloc 12
+; CHECK-NEXT:         push.w  {r11, lr}
+; CHECK-NEXT:         .seh_save_regs_w        {r11, lr}
+; CHECK-NEXT:         sub     sp, #4
+; CHECK-NEXT:         .seh_stackalloc 4
+; CHECK-NEXT:         .seh_endprologue
+
+; CHECK:              .seh_startepilogue
+; CHECK-NEXT:         add     sp, #4
+; CHECK-NEXT:         .seh_stackalloc 4
+; CHECK-NEXT:         pop.w   {r11, lr}
+; CHECK-NEXT:         .seh_save_regs_w        {r11, lr}
+; CHECK-NEXT:         add     sp, #12
+; CHECK-NEXT:         .seh_stackalloc 12
+; CHECK-NEXT:         bx      lr
+; CHECK-NEXT:         .seh_nop
+; CHECK-NEXT:         .seh_endepilogue
+; CHECK-NEXT:         .seh_endproc
+
+define arm_aapcs_vfpcc void @vararg(i32 noundef %a, ...) uwtable "frame-pointer"="none" {
+entry:
+  %ap = alloca ptr, align 4
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %ap)
+  call void @llvm.va_start(ptr nonnull %ap)
+  %0 = load ptr, ptr %ap
+  call arm_aapcs_vfpcc void @useva(ptr noundef %0)
+  call void @llvm.va_end(ptr nonnull %ap)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %ap)
+  ret void
+}
+
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+declare void @llvm.va_start(ptr)
+declare void @llvm.va_end(ptr)
+
+declare arm_aapcs_vfpcc void @useva(ptr noundef)
+
+; CHECK-LABEL: onlystack:
+; CHECK-NEXT: .seh_proc onlystack
+; CHECK-NEXT: @ %bb.0:                                @ %entry
+; CHECK-NEXT:         sub     sp, #4
+; CHECK-NEXT:         .seh_stackalloc 4
+; CHECK-NEXT:         .seh_endprologue
+
+; CHECK:              .seh_startepilogue
+; CHECK-NEXT:         add     sp, #4
+; CHECK-NEXT:         .seh_stackalloc 4
+; CHECK-NEXT:         bx      lr
+; CHECK-NEXT:         .seh_nop
+; CHECK-NEXT:         .seh_endepilogue
+; CHECK-NEXT:         .seh_endproc
+
+define dso_local arm_aapcs_vfpcc void @onlystack() uwtable "frame-pointer"="none" {
+entry:
+  %buf = alloca [4 x i8], align 1
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %buf)
+  call void asm sideeffect "", "r"(ptr nonnull %buf)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %buf)
+  ret void
+}
+
+; CHECK-LABEL: func50:
+; CHECK-NEXT: .seh_proc func50
+; CHECK-NEXT: @ %bb.0:                                @ %entry
+; CHECK-NEXT:         push.w  {r11, lr}
+; CHECK-NEXT:         .seh_save_regs_w        {r11, lr}
+; CHECK-NEXT:         sub     sp, #56
+; CHECK-NEXT:         .seh_stackalloc 56
+; CHECK-NEXT:         .seh_endprologue
+
+; CHECK:              .seh_startepilogue
+; CHECK-NEXT:         add     sp, #56
+; CHECK-NEXT:         .seh_stackalloc 56
+; CHECK-NEXT:         pop.w   {r11, pc}
+; CHECK-NEXT:         .seh_save_regs_w        {r11, lr}
+; CHECK-NEXT:         .seh_endepilogue
+; CHECK-NEXT:         .seh_endproc
+
+define arm_aapcs_vfpcc void @func50() {
+entry:
+  %buf = alloca [50 x i8], align 1
+  call void @llvm.lifetime.start.p0(i64 50, ptr nonnull %buf)
+  call arm_aapcs_vfpcc void @useptr(ptr noundef nonnull %buf)
+  call void @llvm.lifetime.end.p0(i64 50, ptr nonnull %buf)
+  ret void
+}
+
+; CHECK-LABEL: func4000:
+; CHECK-NEXT: .seh_proc func4000
+; CHECK-NEXT: @ %bb.0:                                @ %entry
+; CHECK-NEXT:         push.w  {r11, lr}
+; CHECK-NEXT:         .seh_save_regs_w        {r11, lr}
+; CHECK-NEXT:         sub.w   sp, sp, #4000
+; CHECK-NEXT:         .seh_stackalloc_w       4000
+; CHECK-NEXT:         .seh_endprologue
+
+; CHECK:              .seh_startepilogue
+; CHECK-NEXT:         add.w   sp, sp, #4000
+; CHECK-NEXT:         .seh_stackalloc_w       4000
+; CHECK-NEXT:         pop.w   {r11, pc}
+; CHECK-NEXT:         .seh_save_regs_w        {r11, lr}
+; CHECK-NEXT:         .seh_endepilogue
+; CHECK-NEXT:         .seh_endproc
+
+define arm_aapcs_vfpcc void @func4000() {
+entry:
+  %buf = alloca [4000 x i8], align 1
+  call void @llvm.lifetime.start.p0(i64 4000, ptr nonnull %buf)
+  call arm_aapcs_vfpcc void @useptr(ptr noundef nonnull %buf)
+  call void @llvm.lifetime.end.p0(i64 4000, ptr nonnull %buf)
+  ret void
+}
+
+; CHECK-LABEL: func5000:
+; CHECK-NEXT: .seh_proc func5000
+; CHECK-NEXT: @ %bb.0:                                @ %entry
+; CHECK-NEXT:         push.w  {r4, r5, r6, lr}
+; CHECK-NEXT:         .seh_save_regs_w  {r4-r6, lr}
+; CHECK-NEXT:         movw    r4, #1250
+; CHECK-NEXT:         .seh_nop_w
+; CHECK-NEXT:         bl      __chkstk
+; CHECK-NEXT:         .seh_nop_w
+; CHECK-NEXT:         sub.w   sp, sp, r4
+; CHECK-NEXT:         .seh_stackalloc_w       5000
+; CHECK-NEXT:         .seh_endprologue
+
+; CHECK:              .seh_startepilogue
+; CHECK-NEXT:         add.w   sp, sp, #4992
+; CHECK-NEXT:         .seh_stackalloc_w       4992
+; CHECK-NEXT:         add     sp, #8
+; CHECK-NEXT:         .seh_stackalloc 8
+; CHECK-NEXT:         pop.w   {r4, r5, r6, pc}
+; CHECK-NEXT:         .seh_save_regs_w  {r4-r6, lr}
+; CHECK-NEXT:         .seh_endepilogue
+; CHECK-NEXT:         .seh_endproc
+
+define arm_aapcs_vfpcc void @func5000() {
+entry:
+  %buf = alloca [5000 x i8], align 1
+  call void @llvm.lifetime.start.p0(i64 5000, ptr nonnull %buf)
+  call arm_aapcs_vfpcc void @useptr(ptr noundef nonnull %buf)
+  call void @llvm.lifetime.end.p0(i64 5000, ptr nonnull %buf)
+  ret void
+}
+
+; CHECK-LABEL: func262144:
+; CHECK-NEXT: .seh_proc func262144
+; CHECK-NEXT: @ %bb.0:                                @ %entry
+; CHECK-NEXT:         push.w  {r4, r5, r6, lr}
+; CHECK-NEXT:         .seh_save_regs_w  {r4-r6, lr}
+; CHECK-NEXT:         movw    r4, #0
+; CHECK-NEXT:         .seh_nop
+; CHECK-NEXT:         movt    r4, #1
+; CHECK-NEXT:         .seh_nop_w
+; CHECK-NEXT:         bl      __chkstk
+; CHECK-NEXT:         .seh_nop_w
+; CHECK-NEXT:         sub.w   sp, sp, r4
+; CHECK-NEXT:         .seh_stackalloc_w       262144
+; CHECK-NEXT:         .seh_endprologue
+
+; CHECK:              .seh_startepilogue
+; CHECK-NEXT:         add.w   sp, sp, #262144
+; CHECK-NEXT:         .seh_stackalloc_w       262144
+; CHECK-NEXT:         pop.w   {r4, r5, r6, pc}
+; CHECK-NEXT:         .seh_save_regs_w  {r4-r6, lr}
+; CHECK-NEXT:         .seh_endepilogue
+; CHECK-NEXT:         .seh_endproc
+
+define arm_aapcs_vfpcc void @func262144() {
+entry:
+  %buf = alloca [262144 x i8], align 1
+  call void @llvm.lifetime.start.p0(i64 262144, ptr nonnull %buf)
+  call arm_aapcs_vfpcc void @useptr(ptr noundef nonnull %buf)
+  call void @llvm.lifetime.end.p0(i64 262144, ptr nonnull %buf)
+  ret void
+}
+
+; CHECK-LABEL: func270000:
+; CHECK-NEXT: .seh_proc func270000
+; CHECK-NEXT: @ %bb.0:                                @ %entry
+; CHECK-NEXT:         push.w  {r4, r5, r6, lr}
+; CHECK-NEXT:         .seh_save_regs_w  {r4-r6, lr}
+; CHECK-NEXT:         movw    r4, #1964
+; CHECK-NEXT:         .seh_nop_w
+; CHECK-NEXT:         movt    r4, #1
+; CHECK-NEXT:         .seh_nop_w
+; CHECK-NEXT:         bl      __chkstk
+; CHECK-NEXT:         .seh_nop_w
+; CHECK-NEXT:         sub.w   sp, sp, r4
+; CHECK-NEXT:         .seh_stackalloc_w       270000
+; CHECK-NEXT:         .seh_endprologue
+
+; CHECK:              .seh_startepilogue
+; CHECK-NEXT:         add.w   sp, sp, #268288
+; CHECK-NEXT:         .seh_stackalloc_w       268288
+; CHECK-NEXT:         add.w   sp, sp, #1712
+; CHECK-NEXT:         .seh_stackalloc_w       1712
+; CHECK-NEXT:         pop.w   {r4, r5, r6, pc}
+; CHECK-NEXT:         .seh_save_regs_w  {r4-r6, lr}
+; CHECK-NEXT:         .seh_endepilogue
+; CHECK-NEXT:         .seh_endproc
+
+define arm_aapcs_vfpcc void @func270000() {
+entry:
+  %buf = alloca [270000 x i8], align 1
+  call void @llvm.lifetime.start.p0(i64 270000, ptr nonnull %buf)
+  call arm_aapcs_vfpcc void @useptr(ptr noundef nonnull %buf)
+  call void @llvm.lifetime.end.p0(i64 270000, ptr nonnull %buf)
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @useptr(ptr noundef)

diff  --git a/llvm/test/CodeGen/ARM/Windows/wineh-tailmerge.ll b/llvm/test/CodeGen/ARM/Windows/wineh-tailmerge.ll
new file mode 100644
index 0000000000000..9c21936264dd1
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/Windows/wineh-tailmerge.ll
@@ -0,0 +1,59 @@
+;; Check that epilogues aren't tail merged.
+
+;; Check that this produces the expected assembly output
+; RUN: llc -mtriple=thumbv7-windows -o - %s -verify-machineinstrs | FileCheck %s
+;; Also try to write an object file, which verifies that the SEH opcodes
+;; match the actual prologue/epilogue length.
+; RUN: llc -mtriple=thumbv7-windows -filetype=obj -o %t.obj %s -verify-machineinstrs
+
+; CHECK-LABEL: d:
+; CHECK: .seh_proc d
+
+; CHECK:              push.w  {r11, lr}
+; CHECK-NEXT:         .seh_save_regs_w        {r11, lr}
+; CHECK-NEXT:         .seh_endprologue
+
+; CHECK:              .seh_startepilogue
+; CHECK-NEXT:         pop.w   {r11, lr}
+; CHECK-NEXT:         .seh_save_regs_w        {r11, lr}
+; CHECK-NEXT:         b.w     b
+; CHECK-NEXT:         .seh_nop_w
+; CHECK-NEXT:         .seh_endepilogue
+
+; CHECK:              .seh_startepilogue
+; CHECK-NEXT:         pop.w   {r11, lr}
+; CHECK-NEXT:         .seh_save_regs_w        {r11, lr}
+; CHECK-NEXT:         b.w     c
+; CHECK-NEXT:         .seh_nop_w
+; CHECK-NEXT:         .seh_endepilogue
+; CHECK-NEXT:         .seh_endproc
+
+ at a = global i32 0, align 4
+
+define arm_aapcs_vfpcc void @d() optsize uwtable "frame-pointer"="none" {
+entry:
+  %0 = load i32, ptr @a, align 4
+  switch i32 %0, label %if.then1 [
+    i32 10, label %if.then
+    i32 0, label %if.end2
+  ]
+
+if.then:
+  tail call arm_aapcs_vfpcc void @b()
+  br label %return
+
+if.then1:
+  tail call arm_aapcs_vfpcc void @b()
+  br label %if.end2
+
+if.end2:
+  tail call arm_aapcs_vfpcc void @c()
+  br label %return
+
+return:
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @b(...)
+
+declare arm_aapcs_vfpcc void @c(...)

diff  --git a/llvm/test/DebugInfo/COFF/ARMNT/arm-register-variables.ll b/llvm/test/DebugInfo/COFF/ARMNT/arm-register-variables.ll
index 781a4c65abc90..029138411ed8b 100644
--- a/llvm/test/DebugInfo/COFF/ARMNT/arm-register-variables.ll
+++ b/llvm/test/DebugInfo/COFF/ARMNT/arm-register-variables.ll
@@ -44,9 +44,9 @@
 ; OBJ-NEXT:   OffsetInParent: 0
 ; OBJ-NEXT:   BasePointerOffset: 12
 ; OBJ-NEXT:   LocalVariableAddrRange {
-; OBJ-NEXT:     OffsetStart: .text+0x8
+; OBJ-NEXT:     OffsetStart: .text+0xA
 ; OBJ-NEXT:     ISectStart: 0x0
-; OBJ-NEXT:     Range: 0x1A
+; OBJ-NEXT:     Range: 0x1C
 ; OBJ-NEXT:   }
 ; OBJ-NEXT: }
 


        


More information about the llvm-commits mailing list