[llvm] 747e5cf - X86: support Swift Async context

Tim Northover via llvm-commits llvm-commits at lists.llvm.org
Mon May 17 03:56:24 PDT 2021


Author: Tim Northover
Date: 2021-05-17T11:56:16+01:00
New Revision: 747e5cfb9f5d944b47fe014925b0d5dc2fda74d7

URL: https://github.com/llvm/llvm-project/commit/747e5cfb9f5d944b47fe014925b0d5dc2fda74d7
DIFF: https://github.com/llvm/llvm-project/commit/747e5cfb9f5d944b47fe014925b0d5dc2fda74d7.diff

LOG: X86: support Swift Async context

This adds support to the X86 backend for the newly committed swiftasync
function parameter. If such a (pointer) parameter is present it gets stored
into an augmented frame record (populated in IR, but generally containing
enhanced backtrace for coroutines using lots of tail calls back and forth).

The context frame is identical to AArch64 (primarily so that unwinders etc
don't get extra complexity). Specfically, the new frame record is [AsyncCtx,
%rbp, ReturnAddr], and its presence is signalled by bit 60 of the stored %rbp
being set to 1. %rbp still points to the frame pointer in memory for backwards
compatibility (only partial on x86, but OTOH the weird AsyncCtx before the rest
of the record is because of x86).

Added: 
    llvm/test/CodeGen/X86/swift-async-reg.ll
    llvm/test/CodeGen/X86/swift-async.ll

Modified: 
    llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
    llvm/lib/Target/X86/X86FastISel.cpp
    llvm/lib/Target/X86/X86FrameLowering.cpp
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/lib/Target/X86/X86MachineFunctionInfo.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 628b8f5040d99..7ba90f6c1bea0 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -1454,6 +1454,7 @@ class DarwinX86AsmBackend : public X86AsmBackend {
     unsigned StackAdjust = 0;
     unsigned StackSize = 0;
     unsigned NumDefCFAOffsets = 0;
+    int MinAbsOffset = std::numeric_limits<int>::max();
 
     for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
       const MCCFIInstruction &Inst = Instrs[i];
@@ -1482,6 +1483,7 @@ class DarwinX86AsmBackend : public X86AsmBackend {
         memset(SavedRegs, 0, sizeof(SavedRegs));
         StackAdjust = 0;
         SavedRegIdx = 0;
+        MinAbsOffset = std::numeric_limits<int>::max();
         InstrOffset += MoveInstrSize;
         break;
       }
@@ -1525,6 +1527,7 @@ class DarwinX86AsmBackend : public X86AsmBackend {
         unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
         SavedRegs[SavedRegIdx++] = Reg;
         StackAdjust += OffsetSize;
+        MinAbsOffset = std::min(MinAbsOffset, abs(Inst.getOffset()));
         InstrOffset += PushInstrSize(Reg);
         break;
       }
@@ -1538,6 +1541,11 @@ class DarwinX86AsmBackend : public X86AsmBackend {
         // Offset was too big for a compact unwind encoding.
         return CU::UNWIND_MODE_DWARF;
 
+      // We don't attempt to track a real StackAdjust, so if the saved registers
+      // aren't adjacent to rbp we can't cope.
+      if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize)
+        return CU::UNWIND_MODE_DWARF;
+
       // Get the encoding of the saved registers when we have a frame pointer.
       uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;

diff  --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index cf8d5d6c5b773..8a6b9e75efe03 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -3068,6 +3068,7 @@ bool X86FastISel::fastLowerArguments() {
         Arg.hasAttribute(Attribute::InReg) ||
         Arg.hasAttribute(Attribute::StructRet) ||
         Arg.hasAttribute(Attribute::SwiftSelf) ||
+        Arg.hasAttribute(Attribute::SwiftAsync) ||
         Arg.hasAttribute(Attribute::SwiftError) ||
         Arg.hasAttribute(Attribute::Nest))
       return false;

diff  --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 3c093661680a9..8ddcd52795dd7 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -409,7 +409,12 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
     return 0;
 
   PI = MBB.erase(PI);
-  if (PI != MBB.end() && PI->isCFIInstruction()) PI = MBB.erase(PI);
+  if (PI != MBB.end() && PI->isCFIInstruction()) {
+    auto CIs = MBB.getParent()->getFrameInstructions();
+    MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()];
+    if (CI.getOperation() == MCCFIInstruction::OpDefCfaOffset)
+      PI = MBB.erase(PI);
+  }
   if (!doMergeWithPrevious)
     MBBI = skipDebugInstructionsForward(PI, MBB.end());
 
@@ -1356,6 +1361,14 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
       STI.getTargetLowering()->hasStackProbeSymbol(MF);
   unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
 
+  if (HasFP && X86FI->hasSwiftAsyncContext()) {
+    BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8),
+            MachineFramePtr)
+        .addUse(MachineFramePtr)
+        .addImm(60)
+        .setMIFlag(MachineInstr::FrameSetup);
+  }
+
   // Re-align the stack on 64-bit if the x86-interrupt calling convention is
   // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
   // stack alignment.
@@ -1470,11 +1483,43 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
 
     if (!IsWin64Prologue && !IsFunclet) {
       // Update EBP with the new base value.
-      BuildMI(MBB, MBBI, DL,
-              TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
-              FramePtr)
-          .addReg(StackPtr)
-          .setMIFlag(MachineInstr::FrameSetup);
+      if (!X86FI->hasSwiftAsyncContext()) {
+        BuildMI(MBB, MBBI, DL,
+                TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
+                FramePtr)
+            .addReg(StackPtr)
+            .setMIFlag(MachineInstr::FrameSetup);
+      } else {
+        // Before we update the live frame pointer we have to ensure there's a
+        // valid (or null) asynchronous context in its slot just before FP in
+        // the frame record, so store it now.
+        const auto &Attrs = MF.getFunction().getAttributes();
+
+        if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) {
+          // We have an initial context in r11, store it just before the frame
+          // pointer.
+          BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
+              .addReg(X86::R14)
+              .setMIFlag(MachineInstr::FrameSetup);
+        } else {
+          // No initial context, store null so that there's no pointer that
+          // could be misused.
+          BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i8))
+              .addImm(0)
+              .setMIFlag(MachineInstr::FrameSetup);
+        }
+        BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr)
+            .addUse(X86::RSP)
+            .addImm(1)
+            .addUse(X86::NoRegister)
+            .addImm(8)
+            .addUse(X86::NoRegister)
+            .setMIFlag(MachineInstr::FrameSetup);
+        BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri8), X86::RSP)
+            .addUse(X86::RSP)
+            .addImm(8)
+            .setMIFlag(MachineInstr::FrameSetup);
+      }
 
       if (NeedsDwarfCFI) {
         // Mark effective beginning of when frame pointer becomes valid.
@@ -1979,10 +2024,26 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
   // AfterPop is the position to insert .cfi_restore.
   MachineBasicBlock::iterator AfterPop = MBBI;
   if (HasFP) {
+    if (X86FI->hasSwiftAsyncContext()) {
+      // Discard the context.
+      int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
+      emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/true);
+    }
     // Pop EBP.
     BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
             MachineFramePtr)
         .setMIFlag(MachineInstr::FrameDestroy);
+
+    // We need to reset FP to its untagged state on return. Bit 60 is currently
+    // used to show the presence of an extended frame.
+    if (X86FI->hasSwiftAsyncContext()) {
+      BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8),
+              MachineFramePtr)
+          .addUse(MachineFramePtr)
+          .addImm(60)
+          .setMIFlag(MachineInstr::FrameDestroy);
+    }
+
     if (NeedsDwarfCFI) {
       unsigned DwarfStackPtr =
           TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
@@ -2007,7 +2068,9 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
 
     if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
       if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
-          (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)))
+          (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
+          (Opc != X86::BTR64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)) &&
+          (Opc != X86::ADD64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)))
         break;
       FirstCSPop = PI;
     }
@@ -2039,6 +2102,9 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
     uint64_t LEAAmount =
         IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
 
+    if (X86FI->hasSwiftAsyncContext())
+      LEAAmount -= 16;
+
     // There are only two legal forms of epilogue:
     // - add SEHAllocationSize, %rsp
     // - lea SEHAllocationSize(%FramePtr), %rsp
@@ -2367,6 +2433,14 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
     SpillSlotOffset -= SlotSize;
     MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
 
+    // The async context lives directly before the frame pointer, and we
+    // allocate a second slot to preserve stack alignment.
+    if (X86FI->hasSwiftAsyncContext()) {
+      SpillSlotOffset -= SlotSize;
+      MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
+      SpillSlotOffset -= SlotSize;
+    }
+
     // Since emitPrologue and emitEpilogue will handle spilling and restoring of
     // the frame register, we can delete it from CSI list and not have to worry
     // about avoiding it later.
@@ -3267,7 +3341,11 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
 bool X86FrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
   assert(MBB.getParent() && "Block is not attached to a function!");
   const MachineFunction &MF = *MBB.getParent();
-  return !TRI->hasStackRealignment(MF) || !MBB.isLiveIn(X86::EFLAGS);
+  if (!MBB.isLiveIn(X86::EFLAGS))
+    return true;
+
+  const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+  return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext();
 }
 
 bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
@@ -3280,6 +3358,12 @@ bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
   if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
     return false;
 
+  // Swift async context epilogue has a BTR instruction that clobbers parts of
+  // EFLAGS.
+  const MachineFunction &MF = *MBB.getParent();
+  if (MF.getInfo<X86MachineFunctionInfo>()->hasSwiftAsyncContext())
+    return !flagsNeedToBePreservedBeforeTheTerminators(MBB);
+
   if (canUseLEAForSPInEpilogue(*MBB.getParent()))
     return true;
 

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 66df93319b6a7..4057e7817fcce 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3747,6 +3747,20 @@ SDValue X86TargetLowering::LowerFormalArguments(
   }
 
   for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
+    if (Ins[I].Flags.isSwiftAsync()) {
+      auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
+      if (Subtarget.is64Bit())
+        X86FI->setHasSwiftAsyncContext(true);
+      else {
+        int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false);
+        X86FI->setSwiftAsyncContextFrameIdx(FI);
+        SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I],
+                                  DAG.getFrameIndex(FI, MVT::i32),
+                                  MachinePointerInfo::getFixedStack(MF, FI));
+        Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
+      }
+    }
+
     // Swift calling convention does not require we copy the sret argument
     // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
     if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
@@ -25856,7 +25870,27 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     }
     return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
   }
-
+  case Intrinsic::swift_async_context_addr: {
+    auto &MF = DAG.getMachineFunction();
+    auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
+    if (Subtarget.is64Bit()) {
+      MF.getFrameInfo().setFrameAddressIsTaken(true);
+      X86FI->setHasSwiftAsyncContext(true);
+      return SDValue(
+          DAG.getMachineNode(
+              X86::SUB64ri8, dl, MVT::i64,
+              DAG.getCopyFromReg(DAG.getEntryNode(), dl, X86::RBP, MVT::i64),
+              DAG.getTargetConstant(8, dl, MVT::i32)),
+          0);
+    } else {
+      // 32-bit so no special extended frame, create or reuse an existing stack
+      // slot.
+      if (!X86FI->getSwiftAsyncContextFrameIdx())
+        X86FI->setSwiftAsyncContextFrameIdx(
+            MF.getFrameInfo().CreateStackObject(4, Align(4), false));
+      return DAG.getFrameIndex(*X86FI->getSwiftAsyncContextFrameIdx(), MVT::i32);
+    }
+  }
   case Intrinsic::x86_avx512_vp2intersect_q_512:
   case Intrinsic::x86_avx512_vp2intersect_q_256:
   case Intrinsic::x86_avx512_vp2intersect_q_128:

diff  --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h
index ecb86bb9e8c1f..46d2e2a66fd62 100644
--- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -108,6 +108,13 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
   /// True if this function has any preallocated calls.
   bool HasPreallocatedCall = false;
 
+  /// Whether this function has an extended frame record [Ctx, RBP, Return
+  /// addr]. If so, bit 60 of the in-memory frame pointer will be 1 to enable
+  /// other tools to detect the extended record.
+  bool HasSwiftAsyncContext = false;
+
+  Optional<int> SwiftAsyncContextFrameIdx;
+
   ValueMap<const Value *, size_t> PreallocatedIds;
   SmallVector<size_t, 0> PreallocatedStackSizes;
   SmallVector<SmallVector<size_t, 4>, 0> PreallocatedArgOffsets;
@@ -197,6 +204,14 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
   bool hasPreallocatedCall() const { return HasPreallocatedCall; }
   void setHasPreallocatedCall(bool v) { HasPreallocatedCall = v; }
 
+  bool hasSwiftAsyncContext() const { return HasSwiftAsyncContext; }
+  void setHasSwiftAsyncContext(bool v) { HasSwiftAsyncContext = v; }
+
+  Optional<int> getSwiftAsyncContextFrameIdx() const {
+    return SwiftAsyncContextFrameIdx;
+  }
+  void setSwiftAsyncContextFrameIdx(int v) { SwiftAsyncContextFrameIdx = v; }
+
   size_t getPreallocatedIdForCallSite(const Value *CS) {
     auto Insert = PreallocatedIds.insert({CS, PreallocatedIds.size()});
     if (Insert.second) {

diff  --git a/llvm/test/CodeGen/X86/swift-async-reg.ll b/llvm/test/CodeGen/X86/swift-async-reg.ll
new file mode 100644
index 0000000000000..59b41cc19a51d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/swift-async-reg.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o - | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o - -fast-isel | FileCheck %s
+
+define i8* @argument(i8* swiftasync %in) {
+; CHECK-LABEL: argument:
+; CHECK: movq %r14, %rax
+
+  ret i8* %in
+}
+
+define void @call(i8* %in) {
+; CHECK-LABEL: call:
+; CHECK: movq %rdi, %r14
+
+  call i8* @argument(i8* swiftasync %in)
+  ret void
+}

diff  --git a/llvm/test/CodeGen/X86/swift-async.ll b/llvm/test/CodeGen/X86/swift-async.ll
new file mode 100644
index 0000000000000..9716fe7364896
--- /dev/null
+++ b/llvm/test/CodeGen/X86/swift-async.ll
@@ -0,0 +1,111 @@
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o - | FileCheck %s
+; RUN: llc -mtriple=i686-apple-darwin %s -o - | FileCheck %s --check-prefix=CHECK-32
+
+
+define void @simple(i8* swiftasync %ctx) "frame-pointer"="all" {
+; CHECK-LABEL: simple:
+; CHECK: btsq    $60, %rbp
+; CHECK: pushq   %rbp
+; CHECK: pushq   %r14
+; CHECK: leaq    8(%rsp), %rbp
+; CHECK: pushq
+; [...]
+
+; CHECK: addq    $16, %rsp
+; CHECK: popq    %rbp
+; CHECK: btrq    $60, %rbp
+; CHECK: retq
+
+; CHECK-32-LABEL: simple:
+; CHECK-32: movl 8(%ebp), [[TMP:%.*]]
+; CHECK-32: movl [[TMP]], {{.*}}(%ebp)
+
+  ret void
+}
+
+define void @more_csrs(i8* swiftasync %ctx) "frame-pointer"="all" {
+; CHECK-LABEL: more_csrs:
+; CHECK: btsq    $60, %rbp
+; CHECK: pushq   %rbp
+; CHECK: .cfi_offset %rbp, -16
+; CHECK: pushq   %r14
+; CHECK: leaq    8(%rsp), %rbp
+; CHECK: subq    $8, %rsp
+; CHECK: pushq   %r15
+; CHECK: .cfi_offset %r15, -40
+
+; [...]
+
+; CHECK: popq    %r15
+; CHECK: addq    $16, %rsp
+; CHECK: popq    %rbp
+; CHECK: btrq    $60, %rbp
+; CHECK: retq
+  call void asm sideeffect "", "~{r15}"()
+  ret void
+}
+
+define void @locals(i8* swiftasync %ctx) "frame-pointer"="all" {
+; CHECK-LABEL: locals:
+; CHECK: btsq    $60, %rbp
+; CHECK: pushq   %rbp
+; CHECK: .cfi_def_cfa_offset 16
+; CHECK: .cfi_offset %rbp, -16
+; CHECK: pushq   %r14
+; CHECK: leaq    8(%rsp), %rbp
+; CHECK: .cfi_def_cfa_register %rbp
+; CHECK: subq    $56, %rsp
+
+; CHECK: leaq    -48(%rbp), %rdi
+; CHECK: callq   _bar
+
+; CHECK: addq    $48, %rsp
+; CHECK: addq    $16, %rsp
+; CHECK: popq    %rbp
+; CHECK: btrq    $60, %rbp
+; CHECK: retq
+
+  %var = alloca i32, i32 10
+  call void @bar(i32* %var)
+  ret void
+}
+
+define void @use_input_context(i8* swiftasync %ctx, i8** %ptr) "frame-pointer"="all" {
+; CHECK-LABEL: use_input_context:
+; CHECK: movq    %r14, (%rdi)
+
+  store i8* %ctx, i8** %ptr
+  ret void
+}
+
+define i8** @context_in_func() "frame-pointer"="non-leaf" {
+; CHECK-LABEL: context_in_func:
+; CHECK: leaq    -8(%rbp), %rax
+
+; CHECK-32-LABEL: context_in_func
+; CHECK-32: movl %esp, %eax
+
+  %ptr = call i8** @llvm.swift.async.context.addr()
+  ret i8** %ptr
+}
+
+define void @write_frame_context(i8* swiftasync %ctx, i8* %newctx) "frame-pointer"="non-leaf" {
+; CHECK-LABEL: write_frame_context:
+; CHECK: movq    %rbp, [[TMP:%.*]]
+; CHECK: subq    $8, [[TMP]]
+; CHECK: movq    %rdi, ([[TMP]])
+
+  %ptr = call i8** @llvm.swift.async.context.addr()
+  store i8* %newctx, i8** %ptr
+  ret void
+}
+
+define void @simple_fp_elim(i8* swiftasync %ctx) "frame-pointer"="non-leaf" {
+; CHECK-LABEL: simple_fp_elim:
+; CHECK-NOT: btsq
+
+  ret void
+}
+
+declare void @bar(i32*)
+declare i8** @llvm.swift.async.context.addr()


        


More information about the llvm-commits mailing list