[llvm] r333015 - [MachineOutliner] Add "thunk" outlining for AArch64.

Eli Friedman via llvm-commits llvm-commits at lists.llvm.org
Tue May 22 12:11:06 PDT 2018


Author: efriedma
Date: Tue May 22 12:11:06 2018
New Revision: 333015

URL: http://llvm.org/viewvc/llvm-project?rev=333015&view=rev
Log:
[MachineOutliner] Add "thunk" outlining for AArch64.

When we're outlining a sequence that ends in a call, we can save up to
three instructions in the outlined function by turning the call into
a tail-call. I refer to this as thunk outlining because the resulting
outlined function looks like a thunk; suggestions welcome for a better
name.

In addition to making the outlined function shorter, thunk outlining
allows outlining calls which would otherwise be illegal to outline:
we don't need to save/restore LR, so we don't need to prove anything
about the stack access patterns of the callee.

To make this work effectively, I also added
MachineOutlinerInstrType::LegalTerminator to the generic MachineOutliner
code; this allows treating an arbitrary instruction as a terminator in
the suffix tree.

Differential Revision: https://reviews.llvm.org/D47173


Added:
    llvm/trunk/test/CodeGen/AArch64/machine-outliner-thunk.ll
Modified:
    llvm/trunk/include/llvm/CodeGen/TargetInstrInfo.h
    llvm/trunk/lib/CodeGen/MachineOutliner.cpp
    llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
    llvm/trunk/test/CodeGen/AArch64/machine-outliner.mir

Modified: llvm/trunk/include/llvm/CodeGen/TargetInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetInstrInfo.h?rev=333015&r1=333014&r2=333015&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/TargetInstrInfo.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetInstrInfo.h Tue May 22 12:11:06 2018
@@ -1639,10 +1639,12 @@ public:
 
   /// Represents how an instruction should be mapped by the outliner.
   /// \p Legal instructions are those which are safe to outline.
+  /// \p LegalTerminator instructions are safe to outline, but only as the
+  /// last instruction in a sequence.
   /// \p Illegal instructions are those which cannot be outlined.
   /// \p Invisible instructions are instructions which can be outlined, but
   /// shouldn't actually impact the outlining result.
-  enum MachineOutlinerInstrType { Legal, Illegal, Invisible };
+  enum MachineOutlinerInstrType { Legal, LegalTerminator, Illegal, Invisible };
 
   /// Returns how or if \p MI should be outlined.
   virtual MachineOutlinerInstrType

Modified: llvm/trunk/lib/CodeGen/MachineOutliner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineOutliner.cpp?rev=333015&r1=333014&r2=333015&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/MachineOutliner.cpp (original)
+++ llvm/trunk/lib/CodeGen/MachineOutliner.cpp Tue May 22 12:11:06 2018
@@ -777,6 +777,13 @@ struct InstructionMapper {
         mapToLegalUnsigned(It);
         break;
 
+      case TargetInstrInfo::MachineOutlinerInstrType::LegalTerminator:
+        mapToLegalUnsigned(It);
+        InstrList.push_back(It);
+        UnsignedVec.push_back(IllegalInstrNumber);
+        IllegalInstrNumber--;
+        break;
+
       case TargetInstrInfo::MachineOutlinerInstrType::Invisible:
         break;
       }

Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=333015&r1=333014&r2=333015&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp Tue May 22 12:11:06 2018
@@ -4901,10 +4901,26 @@ AArch64InstrInfo::getSerializableMachine
   /// * Frame construction overhead: 1 (RET)
   /// * Requires stack fixups? No
   ///
+  /// \p MachineOutlinerThunk implies that the function is being created from
+  /// a sequence of instructions ending in a call. The outlined function is
+  /// called with a BL instruction, and the outlined function tail-calls the
+  /// original call destination.
+  ///
+  /// That is,
+  ///
+  /// I1                                OUTLINED_FUNCTION:
+  /// I2 --> BL OUTLINED_FUNCTION       I1
+  /// BL f                              I2
+  ///                                   B f
+  /// * Call construction overhead: 1 (BL)
+  /// * Frame construction overhead: 0
+  /// * Requires stack fixups? No
+  ///
 enum MachineOutlinerClass {
   MachineOutlinerDefault,  /// Emit a save, restore, call, and return.
   MachineOutlinerTailCall, /// Only emit a branch.
-  MachineOutlinerNoLRSave  /// Emit a call and return.
+  MachineOutlinerNoLRSave, /// Emit a call and return.
+  MachineOutlinerThunk,    /// Emit a call and tail-call.
 };
 
 enum MachineOutlinerMBBFlags {
@@ -4950,6 +4966,8 @@ AArch64InstrInfo::getOutlininingCandidat
       [this](std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>
                  &I) { return canOutlineWithoutLRSave(I.second); };
 
+  unsigned LastInstrOpcode = RepeatedSequenceLocs[0].second->getOpcode();
+
   // If the last instruction in any candidate is a terminator, then we should
   // tail call all of the candidates.
   if (RepeatedSequenceLocs[0].second->isTerminator()) {
@@ -4959,6 +4977,14 @@ AArch64InstrInfo::getOutlininingCandidat
     NumBytesToCreateFrame = 0;
   }
 
+  else if (LastInstrOpcode == AArch64::BL || LastInstrOpcode == AArch64::BLR) {
+    // FIXME: Do we need to check if the code after this uses the value of LR?
+    CallID = MachineOutlinerThunk;
+    FrameID = MachineOutlinerThunk;
+    NumBytesForCall = 4;
+    NumBytesToCreateFrame = 0;
+  }
+
   else if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
                        DoesntNeedLRSave)) {
     CallID = MachineOutlinerNoLRSave;
@@ -4977,8 +5003,9 @@ AArch64InstrInfo::getOutlininingCandidat
   // last instruction is a call. We don't want to save + restore in this case.
   // However, it could be possible that the last instruction is a call without
   // it being valid to tail call this sequence. We should consider this as well.
-  else if (RepeatedSequenceLocs[0].second->isCall() &&
-           FrameID != MachineOutlinerTailCall)
+  else if (FrameID != MachineOutlinerThunk &&
+           FrameID != MachineOutlinerTailCall &&
+           RepeatedSequenceLocs[0].second->isCall())
     NumBytesToCreateFrame += 8;
 
   return MachineOutlinerInfo(SequenceSize, NumBytesForCall,
@@ -5092,36 +5119,49 @@ AArch64InstrInfo::getOutliningType(Machi
   // stack. Thus, if we outline, say, half the parameters for a function call
   // plus the call, then we'll break the callee's expectations for the layout
   // of the stack.
+  //
+  // FIXME: Allow calls to functions which construct a stack frame, as long
+  // as they don't access arguments on the stack.
+  // FIXME: Figure out some way to analyze functions defined in other modules.
+  // We should be able to compute the memory usage based on the IR calling
+  // convention, even if we can't see the definition.
   if (MI.isCall()) {
     const Module *M = MF->getFunction().getParent();
     assert(M && "No module?");
 
     // Get the function associated with the call. Look at each operand and find
     // the one that represents the callee and get its name.
-    Function *Callee = nullptr;
+    const Function *Callee = nullptr;
     for (const MachineOperand &MOP : MI.operands()) {
-      if (MOP.isSymbol()) {
-        Callee = M->getFunction(MOP.getSymbolName());
-        break;
-      }
-
-      else if (MOP.isGlobal()) {
-        Callee = M->getFunction(MOP.getGlobal()->getGlobalIdentifier());
+      if (MOP.isGlobal()) {
+        Callee = dyn_cast<Function>(MOP.getGlobal());
         break;
       }
     }
 
-    // Only handle functions that we have information about.
-    if (!Callee)
+    // Never outline calls to mcount.  There isn't any rule that would require
+    // this, but the Linux kernel's "ftrace" feature depends on it.
+    if (Callee && Callee->getName() == "\01_mcount")
       return MachineOutlinerInstrType::Illegal;
 
+    // If we don't know anything about the callee, assume it depends on the
+    // stack layout of the caller. In that case, it's only legal to outline
+    // as a tail-call.  Whitelist the call instructions we know about so we
+    // don't get unexpected results with call pseudo-instructions.
+    auto UnknownCallOutlineType = MachineOutlinerInstrType::Illegal;
+    if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL)
+      UnknownCallOutlineType = MachineOutlinerInstrType::LegalTerminator;
+
+    if (!Callee)
+      return UnknownCallOutlineType;
+
     // We have a function we have information about. Check it if it's something
     // can safely outline.
     MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
 
     // We don't know what's going on with the callee at all. Don't touch it.
     if (!CalleeMF)
-      return MachineOutlinerInstrType::Illegal;
+      return UnknownCallOutlineType;
 
     // Check if we know anything about the callee saves on the function. If we
     // don't, then don't touch it, since that implies that we haven't
@@ -5129,7 +5169,7 @@ AArch64InstrInfo::getOutliningType(Machi
     MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
     if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
         MFI.getNumObjects() > 0)
-      return MachineOutlinerInstrType::Illegal;
+      return UnknownCallOutlineType;
 
     // At this point, we can say that CalleeMF ought to not pass anything on the
     // stack. Therefore, we can outline it.
@@ -5153,6 +5193,8 @@ AArch64InstrInfo::getOutliningType(Machi
     // * LR is available in the range (No save/restore around call)
     // * The range doesn't include calls (No save/restore in outlined frame)
     // are true.
+    // FIXME: This is very restrictive; the flags check the whole block,
+    // not just the bit we will try to outline.
     bool MightNeedStackFixUp =
         (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |
                   MachineOutlinerMBBFlags::HasCalls));
@@ -5267,6 +5309,24 @@ void AArch64InstrInfo::fixupPostOutline(
 void AArch64InstrInfo::insertOutlinerEpilogue(
     MachineBasicBlock &MBB, MachineFunction &MF,
     const MachineOutlinerInfo &MInfo) const {
+  // For thunk outlining, rewrite the last instruction from a call to a
+  // tail-call.
+  if (MInfo.FrameConstructionID == MachineOutlinerThunk) {
+    MachineInstr *Call = &*--MBB.instr_end();
+    unsigned TailOpcode;
+    if (Call->getOpcode() == AArch64::BL) {
+      TailOpcode = AArch64::TCRETURNdi;
+    } else {
+      assert(Call->getOpcode() == AArch64::BLR);
+      TailOpcode = AArch64::TCRETURNri;
+    }
+    MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
+                            .add(Call->getOperand(0))
+                            .addImm(0);
+    MBB.insert(MBB.end(), TC);
+    Call->eraseFromParent();
+  }
+
   // Is there a call in the outlined range?
   auto IsNonTailCall = [](MachineInstr &MI) {
     return MI.isCall() && !MI.isReturn();
@@ -5274,6 +5334,8 @@ void AArch64InstrInfo::insertOutlinerEpi
   if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
     // Fix up the instructions in the range, since we're going to modify the
     // stack.
+    assert(MInfo.FrameConstructionID != MachineOutlinerDefault &&
+           "Can only fix up stack references once");
     fixupPostOutline(MBB);
 
     // LR has to be a live in so that we can save it.
@@ -5282,7 +5344,8 @@ void AArch64InstrInfo::insertOutlinerEpi
     MachineBasicBlock::iterator It = MBB.begin();
     MachineBasicBlock::iterator Et = MBB.end();
 
-    if (MInfo.FrameConstructionID == MachineOutlinerTailCall)
+    if (MInfo.FrameConstructionID == MachineOutlinerTailCall ||
+        MInfo.FrameConstructionID == MachineOutlinerThunk)
       Et = std::prev(MBB.end());
 
     // Insert a save before the outlined region
@@ -5322,7 +5385,8 @@ void AArch64InstrInfo::insertOutlinerEpi
   }
 
   // If this is a tail call outlined function, then there's already a return.
-  if (MInfo.FrameConstructionID == MachineOutlinerTailCall)
+  if (MInfo.FrameConstructionID == MachineOutlinerTailCall ||
+      MInfo.FrameConstructionID == MachineOutlinerThunk)
     return;
 
   // It's not a tail call, so we have to insert the return ourselves.
@@ -5357,7 +5421,8 @@ MachineBasicBlock::iterator AArch64Instr
   }
 
   // Are we saving the link register?
-  if (MInfo.CallConstructionID == MachineOutlinerNoLRSave) {
+  if (MInfo.CallConstructionID == MachineOutlinerNoLRSave ||
+      MInfo.CallConstructionID == MachineOutlinerThunk) {
     // No, so just insert the call.
     It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
                             .addGlobalAddress(M.getNamedValue(MF.getName())));

Added: llvm/trunk/test/CodeGen/AArch64/machine-outliner-thunk.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/machine-outliner-thunk.ll?rev=333015&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/machine-outliner-thunk.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/machine-outliner-thunk.ll Tue May 22 12:11:06 2018
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -enable-machine-outliner -verify-machineinstrs | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-pc-linux-gnu"
+
+declare i32 @thunk_called_fn(i32, i32, i32, i32)
+
+define i32 @a() {
+; CHECK-LABEL: a:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl OUTLINED_FUNCTION_0
+; CHECK-NEXT:    add w0, w0, #8 // =8
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+entry:
+  %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4)
+  %cx = add i32 %call, 8
+  ret i32 %cx
+}
+
+define i32 @b() {
+; CHECK-LABEL: b:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl OUTLINED_FUNCTION_0
+; CHECK-NEXT:    add w0, w0, #88 // =88
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+entry:
+  %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4)
+  %cx = add i32 %call, 88
+  ret i32 %cx
+}
+
+; CHECK-LABEL: OUTLINED_FUNCTION_0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr     w0, wzr, #0x1
+; CHECK-NEXT:    orr     w1, wzr, #0x2
+; CHECK-NEXT:    orr     w2, wzr, #0x3
+; CHECK-NEXT:    orr     w3, wzr, #0x4
+; CHECK-NEXT:    b       thunk_called_fn

Modified: llvm/trunk/test/CodeGen/AArch64/machine-outliner.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/machine-outliner.mir?rev=333015&r1=333014&r2=333015&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/machine-outliner.mir (original)
+++ llvm/trunk/test/CodeGen/AArch64/machine-outliner.mir Tue May 22 12:11:06 2018
@@ -124,6 +124,8 @@ body:             |
     $w17 = ORRWri $wzr, 1
     $w17 = ORRWri $wzr, 1
     BL @baz, implicit-def dead $lr, implicit $sp
+    $w17 = ORRWri $wzr, 1
+    $w17 = ORRWri $wzr, 1
     $w17 = ORRWri $wzr, 2
     BL @baz, implicit-def dead $lr, implicit $sp
     $w17 = ORRWri $wzr, 1
@@ -131,6 +133,8 @@ body:             |
     $w17 = ORRWri $wzr, 1
     $w17 = ORRWri $wzr, 1
     BL @baz, implicit-def dead $lr, implicit $sp
+    $w17 = ORRWri $wzr, 1
+    $w17 = ORRWri $wzr, 1
     $w8 = ORRWri $wzr, 0
     
   bb.2:




More information about the llvm-commits mailing list