[llvm] r333015 - [MachineOutliner] Add "thunk" outlining for AArch64.
Eli Friedman via llvm-commits
llvm-commits at lists.llvm.org
Tue May 22 12:11:06 PDT 2018
Author: efriedma
Date: Tue May 22 12:11:06 2018
New Revision: 333015
URL: http://llvm.org/viewvc/llvm-project?rev=333015&view=rev
Log:
[MachineOutliner] Add "thunk" outlining for AArch64.
When we're outlining a sequence that ends in a call, we can save up to
three instructions in the outlined function by turning the call into
a tail-call. I refer to this as thunk outlining because the resulting
outlined function looks like a thunk; suggestions welcome for a better
name.
In addition to making the outlined function shorter, thunk outlining
allows outlining calls which would otherwise be illegal to outline:
we don't need to save/restore LR, so we don't need to prove anything
about the stack access patterns of the callee.
To make this work effectively, I also added
MachineOutlinerInstrType::LegalTerminator to the generic MachineOutliner
code; this allows treating an arbitrary instruction as a terminator in
the suffix tree.
Differential Revision: https://reviews.llvm.org/D47173
Added:
llvm/trunk/test/CodeGen/AArch64/machine-outliner-thunk.ll
Modified:
llvm/trunk/include/llvm/CodeGen/TargetInstrInfo.h
llvm/trunk/lib/CodeGen/MachineOutliner.cpp
llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/trunk/test/CodeGen/AArch64/machine-outliner.mir
Modified: llvm/trunk/include/llvm/CodeGen/TargetInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetInstrInfo.h?rev=333015&r1=333014&r2=333015&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/TargetInstrInfo.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetInstrInfo.h Tue May 22 12:11:06 2018
@@ -1639,10 +1639,12 @@ public:
/// Represents how an instruction should be mapped by the outliner.
/// \p Legal instructions are those which are safe to outline.
+ /// \p LegalTerminator instructions are safe to outline, but only as the
+ /// last instruction in a sequence.
/// \p Illegal instructions are those which cannot be outlined.
/// \p Invisible instructions are instructions which can be outlined, but
/// shouldn't actually impact the outlining result.
- enum MachineOutlinerInstrType { Legal, Illegal, Invisible };
+ enum MachineOutlinerInstrType { Legal, LegalTerminator, Illegal, Invisible };
/// Returns how or if \p MI should be outlined.
virtual MachineOutlinerInstrType
Modified: llvm/trunk/lib/CodeGen/MachineOutliner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineOutliner.cpp?rev=333015&r1=333014&r2=333015&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/MachineOutliner.cpp (original)
+++ llvm/trunk/lib/CodeGen/MachineOutliner.cpp Tue May 22 12:11:06 2018
@@ -777,6 +777,13 @@ struct InstructionMapper {
mapToLegalUnsigned(It);
break;
+ case TargetInstrInfo::MachineOutlinerInstrType::LegalTerminator:
+ mapToLegalUnsigned(It);
+ InstrList.push_back(It);
+ UnsignedVec.push_back(IllegalInstrNumber);
+ IllegalInstrNumber--;
+ break;
+
case TargetInstrInfo::MachineOutlinerInstrType::Invisible:
break;
}
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=333015&r1=333014&r2=333015&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp Tue May 22 12:11:06 2018
@@ -4901,10 +4901,26 @@ AArch64InstrInfo::getSerializableMachine
/// * Frame construction overhead: 1 (RET)
/// * Requires stack fixups? No
///
+ /// \p MachineOutlinerThunk implies that the function is being created from
+ /// a sequence of instructions ending in a call. The outlined function is
+ /// called with a BL instruction, and the outlined function tail-calls the
+ /// original call destination.
+ ///
+ /// That is,
+ ///
+ /// I1 OUTLINED_FUNCTION:
+ /// I2 --> BL OUTLINED_FUNCTION I1
+ /// BL f I2
+ /// B f
+ /// * Call construction overhead: 1 (BL)
+ /// * Frame construction overhead: 0
+ /// * Requires stack fixups? No
+ ///
enum MachineOutlinerClass {
MachineOutlinerDefault, /// Emit a save, restore, call, and return.
MachineOutlinerTailCall, /// Only emit a branch.
- MachineOutlinerNoLRSave /// Emit a call and return.
+ MachineOutlinerNoLRSave, /// Emit a call and return.
+ MachineOutlinerThunk, /// Emit a call and tail-call.
};
enum MachineOutlinerMBBFlags {
@@ -4950,6 +4966,8 @@ AArch64InstrInfo::getOutlininingCandidat
[this](std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>
&I) { return canOutlineWithoutLRSave(I.second); };
+ unsigned LastInstrOpcode = RepeatedSequenceLocs[0].second->getOpcode();
+
// If the last instruction in any candidate is a terminator, then we should
// tail call all of the candidates.
if (RepeatedSequenceLocs[0].second->isTerminator()) {
@@ -4959,6 +4977,14 @@ AArch64InstrInfo::getOutlininingCandidat
NumBytesToCreateFrame = 0;
}
+ else if (LastInstrOpcode == AArch64::BL || LastInstrOpcode == AArch64::BLR) {
+ // FIXME: Do we need to check if the code after this uses the value of LR?
+ CallID = MachineOutlinerThunk;
+ FrameID = MachineOutlinerThunk;
+ NumBytesForCall = 4;
+ NumBytesToCreateFrame = 0;
+ }
+
else if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
DoesntNeedLRSave)) {
CallID = MachineOutlinerNoLRSave;
@@ -4977,8 +5003,9 @@ AArch64InstrInfo::getOutlininingCandidat
// last instruction is a call. We don't want to save + restore in this case.
// However, it could be possible that the last instruction is a call without
// it being valid to tail call this sequence. We should consider this as well.
- else if (RepeatedSequenceLocs[0].second->isCall() &&
- FrameID != MachineOutlinerTailCall)
+ else if (FrameID != MachineOutlinerThunk &&
+ FrameID != MachineOutlinerTailCall &&
+ RepeatedSequenceLocs[0].second->isCall())
NumBytesToCreateFrame += 8;
return MachineOutlinerInfo(SequenceSize, NumBytesForCall,
@@ -5092,36 +5119,49 @@ AArch64InstrInfo::getOutliningType(Machi
// stack. Thus, if we outline, say, half the parameters for a function call
// plus the call, then we'll break the callee's expectations for the layout
// of the stack.
+ //
+ // FIXME: Allow calls to functions which construct a stack frame, as long
+ // as they don't access arguments on the stack.
+ // FIXME: Figure out some way to analyze functions defined in other modules.
+ // We should be able to compute the memory usage based on the IR calling
+ // convention, even if we can't see the definition.
if (MI.isCall()) {
const Module *M = MF->getFunction().getParent();
assert(M && "No module?");
// Get the function associated with the call. Look at each operand and find
// the one that represents the callee and get its name.
- Function *Callee = nullptr;
+ const Function *Callee = nullptr;
for (const MachineOperand &MOP : MI.operands()) {
- if (MOP.isSymbol()) {
- Callee = M->getFunction(MOP.getSymbolName());
- break;
- }
-
- else if (MOP.isGlobal()) {
- Callee = M->getFunction(MOP.getGlobal()->getGlobalIdentifier());
+ if (MOP.isGlobal()) {
+ Callee = dyn_cast<Function>(MOP.getGlobal());
break;
}
}
- // Only handle functions that we have information about.
- if (!Callee)
+ // Never outline calls to mcount. There isn't any rule that would require
+ // this, but the Linux kernel's "ftrace" feature depends on it.
+ if (Callee && Callee->getName() == "\01_mcount")
return MachineOutlinerInstrType::Illegal;
+ // If we don't know anything about the callee, assume it depends on the
+ // stack layout of the caller. In that case, it's only legal to outline
+ // as a tail-call. Whitelist the call instructions we know about so we
+ // don't get unexpected results with call pseudo-instructions.
+ auto UnknownCallOutlineType = MachineOutlinerInstrType::Illegal;
+ if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL)
+ UnknownCallOutlineType = MachineOutlinerInstrType::LegalTerminator;
+
+ if (!Callee)
+ return UnknownCallOutlineType;
+
// We have a function we have information about. Check it if it's something
// can safely outline.
MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
// We don't know what's going on with the callee at all. Don't touch it.
if (!CalleeMF)
- return MachineOutlinerInstrType::Illegal;
+ return UnknownCallOutlineType;
// Check if we know anything about the callee saves on the function. If we
// don't, then don't touch it, since that implies that we haven't
@@ -5129,7 +5169,7 @@ AArch64InstrInfo::getOutliningType(Machi
MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
MFI.getNumObjects() > 0)
- return MachineOutlinerInstrType::Illegal;
+ return UnknownCallOutlineType;
// At this point, we can say that CalleeMF ought to not pass anything on the
// stack. Therefore, we can outline it.
@@ -5153,6 +5193,8 @@ AArch64InstrInfo::getOutliningType(Machi
// * LR is available in the range (No save/restore around call)
// * The range doesn't include calls (No save/restore in outlined frame)
// are true.
+ // FIXME: This is very restrictive; the flags check the whole block,
+ // not just the bit we will try to outline.
bool MightNeedStackFixUp =
(Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |
MachineOutlinerMBBFlags::HasCalls));
@@ -5267,6 +5309,24 @@ void AArch64InstrInfo::fixupPostOutline(
void AArch64InstrInfo::insertOutlinerEpilogue(
MachineBasicBlock &MBB, MachineFunction &MF,
const MachineOutlinerInfo &MInfo) const {
+ // For thunk outlining, rewrite the last instruction from a call to a
+ // tail-call.
+ if (MInfo.FrameConstructionID == MachineOutlinerThunk) {
+ MachineInstr *Call = &*--MBB.instr_end();
+ unsigned TailOpcode;
+ if (Call->getOpcode() == AArch64::BL) {
+ TailOpcode = AArch64::TCRETURNdi;
+ } else {
+ assert(Call->getOpcode() == AArch64::BLR);
+ TailOpcode = AArch64::TCRETURNri;
+ }
+ MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
+ .add(Call->getOperand(0))
+ .addImm(0);
+ MBB.insert(MBB.end(), TC);
+ Call->eraseFromParent();
+ }
+
// Is there a call in the outlined range?
auto IsNonTailCall = [](MachineInstr &MI) {
return MI.isCall() && !MI.isReturn();
@@ -5274,6 +5334,8 @@ void AArch64InstrInfo::insertOutlinerEpi
if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
// Fix up the instructions in the range, since we're going to modify the
// stack.
+ assert(MInfo.FrameConstructionID != MachineOutlinerDefault &&
+ "Can only fix up stack references once");
fixupPostOutline(MBB);
// LR has to be a live in so that we can save it.
@@ -5282,7 +5344,8 @@ void AArch64InstrInfo::insertOutlinerEpi
MachineBasicBlock::iterator It = MBB.begin();
MachineBasicBlock::iterator Et = MBB.end();
- if (MInfo.FrameConstructionID == MachineOutlinerTailCall)
+ if (MInfo.FrameConstructionID == MachineOutlinerTailCall ||
+ MInfo.FrameConstructionID == MachineOutlinerThunk)
Et = std::prev(MBB.end());
// Insert a save before the outlined region
@@ -5322,7 +5385,8 @@ void AArch64InstrInfo::insertOutlinerEpi
}
// If this is a tail call outlined function, then there's already a return.
- if (MInfo.FrameConstructionID == MachineOutlinerTailCall)
+ if (MInfo.FrameConstructionID == MachineOutlinerTailCall ||
+ MInfo.FrameConstructionID == MachineOutlinerThunk)
return;
// It's not a tail call, so we have to insert the return ourselves.
@@ -5357,7 +5421,8 @@ MachineBasicBlock::iterator AArch64Instr
}
// Are we saving the link register?
- if (MInfo.CallConstructionID == MachineOutlinerNoLRSave) {
+ if (MInfo.CallConstructionID == MachineOutlinerNoLRSave ||
+ MInfo.CallConstructionID == MachineOutlinerThunk) {
// No, so just insert the call.
It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
.addGlobalAddress(M.getNamedValue(MF.getName())));
Added: llvm/trunk/test/CodeGen/AArch64/machine-outliner-thunk.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/machine-outliner-thunk.ll?rev=333015&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/machine-outliner-thunk.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/machine-outliner-thunk.ll Tue May 22 12:11:06 2018
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -enable-machine-outliner -verify-machineinstrs | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-pc-linux-gnu"
+
+declare i32 @thunk_called_fn(i32, i32, i32, i32)
+
+define i32 @a() {
+; CHECK-LABEL: a:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl OUTLINED_FUNCTION_0
+; CHECK-NEXT: add w0, w0, #8 // =8
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4)
+ %cx = add i32 %call, 8
+ ret i32 %cx
+}
+
+define i32 @b() {
+; CHECK-LABEL: b:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl OUTLINED_FUNCTION_0
+; CHECK-NEXT: add w0, w0, #88 // =88
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4)
+ %cx = add i32 %call, 88
+ ret i32 %cx
+}
+
+; CHECK-LABEL: OUTLINED_FUNCTION_0:
+; CHECK: // %bb.0:
+; CHECK-NEXT: orr w0, wzr, #0x1
+; CHECK-NEXT: orr w1, wzr, #0x2
+; CHECK-NEXT: orr w2, wzr, #0x3
+; CHECK-NEXT: orr w3, wzr, #0x4
+; CHECK-NEXT: b thunk_called_fn
Modified: llvm/trunk/test/CodeGen/AArch64/machine-outliner.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/machine-outliner.mir?rev=333015&r1=333014&r2=333015&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/machine-outliner.mir (original)
+++ llvm/trunk/test/CodeGen/AArch64/machine-outliner.mir Tue May 22 12:11:06 2018
@@ -124,6 +124,8 @@ body: |
$w17 = ORRWri $wzr, 1
$w17 = ORRWri $wzr, 1
BL @baz, implicit-def dead $lr, implicit $sp
+ $w17 = ORRWri $wzr, 1
+ $w17 = ORRWri $wzr, 1
$w17 = ORRWri $wzr, 2
BL @baz, implicit-def dead $lr, implicit $sp
$w17 = ORRWri $wzr, 1
@@ -131,6 +133,8 @@ body: |
$w17 = ORRWri $wzr, 1
$w17 = ORRWri $wzr, 1
BL @baz, implicit-def dead $lr, implicit $sp
+ $w17 = ORRWri $wzr, 1
+ $w17 = ORRWri $wzr, 1
$w8 = ORRWri $wzr, 0
bb.2:
More information about the llvm-commits
mailing list