[llvm] 8f7f2c4 - Revert "[AArch64] Homogeneous Prolog and Epilog Size Optimization"
Puyan Lotfi via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 1 23:34:41 PST 2021
Author: Puyan Lotfi
Date: 2021-02-02T02:33:44-05:00
New Revision: 8f7f2c4211caf724e8319976968995bec1894b96
URL: https://github.com/llvm/llvm-project/commit/8f7f2c4211caf724e8319976968995bec1894b96
DIFF: https://github.com/llvm/llvm-project/commit/8f7f2c4211caf724e8319976968995bec1894b96.diff
LOG: Revert "[AArch64] Homogeneous Prolog and Epilog Size Optimization"
This reverts commit 0426be3df6180747bd68706db87a70580f064f0f.
Reverting due to some expensive-checks failures in tests.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64.h
llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
llvm/lib/Target/AArch64/AArch64FrameLowering.h
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
llvm/lib/Target/AArch64/CMakeLists.txt
Removed:
llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-bad-outline.mir
llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-frame-tail.ll
llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-no-helper.ll
llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog.ll
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index e304ce94f439..d2170a99e0a2 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -42,7 +42,6 @@ FunctionPass *createAArch64SLSHardeningPass();
FunctionPass *createAArch64IndirectThunks();
FunctionPass *createAArch64SpeculationHardeningPass();
FunctionPass *createAArch64LoadStoreOptimizationPass();
-ModulePass *createAArch64LowerHomogeneousPrologEpilogPass();
FunctionPass *createAArch64SIMDInstrOptPass();
ModulePass *createAArch64PromoteConstantPass();
FunctionPass *createAArch64ConditionOptimizerPass();
@@ -80,7 +79,6 @@ void initializeAArch64ExpandPseudoPass(PassRegistry&);
void initializeAArch64SLSHardeningPass(PassRegistry&);
void initializeAArch64SpeculationHardeningPass(PassRegistry&);
void initializeAArch64LoadStoreOptPass(PassRegistry&);
-void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &);
void initializeAArch64SIMDInstrOptPass(PassRegistry&);
void initializeAArch64PreLegalizerCombinerPass(PassRegistry&);
void initializeAArch64PostLegalizerCombinerPass(PassRegistry &);
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index acfd1785d102..f5847cfa8fa9 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -179,11 +179,6 @@ static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects",
cl::desc("sort stack allocations"),
cl::init(true), cl::Hidden);
-cl::opt<bool> EnableHomogeneousPrologEpilog(
- "homogeneous-prolog-epilog", cl::init(false), cl::ZeroOrMore, cl::Hidden,
- cl::desc("Emit homogeneous prologue and epilogue for the size "
- "optimization (default = off)"));
-
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
/// Returns the argument pop size.
@@ -218,47 +213,6 @@ static uint64_t getArgumentPopSize(MachineFunction &MF,
return ArgumentPopSize;
}
-static bool produceCompactUnwindFrame(MachineFunction &MF);
-static bool needsWinCFI(const MachineFunction &MF);
-static StackOffset getSVEStackSize(const MachineFunction &MF);
-
-/// Returns true if a homogeneous prolog or epilog code can be emitted
-/// for the size optimization. If possible, a frame helper call is injected.
-/// When Exit block is given, this check is for epilog.
-bool AArch64FrameLowering::homogeneousPrologEpilog(
- MachineFunction &MF, MachineBasicBlock *Exit) const {
- if (!MF.getFunction().hasMinSize())
- return false;
- if (!EnableHomogeneousPrologEpilog)
- return false;
- if (ReverseCSRRestoreSeq)
- return false;
- if (EnableRedZone)
- return false;
-
- // TODO: Window is supported yet.
- if (needsWinCFI(MF))
- return false;
- // TODO: SVE is not supported yet.
- if (getSVEStackSize(MF))
- return false;
-
- // Bail on stack adjustment needed on return for simplicity.
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
- if (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF))
- return false;
- if (Exit && getArgumentPopSize(MF, *Exit))
- return false;
-
- return true;
-}
-
-/// Returns true if CSRs should be paired.
-bool AArch64FrameLowering::producePairRegisters(MachineFunction &MF) const {
- return produceCompactUnwindFrame(MF) || homogeneousPrologEpilog(MF);
-}
-
/// This is the biggest offset to the stack pointer we can encode in aarch64
/// instructions (without using a separate calculation and a temp register).
/// Note that the exception here are vector stores/loads which cannot encode any
@@ -651,8 +605,6 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
const MachineFrameInfo &MFI = MF.getFrameInfo();
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
- if (homogeneousPrologEpilog(MF))
- return false;
if (AFI->getLocalStackSize() == 0)
return false;
@@ -1196,16 +1148,12 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// All of the remaining stack allocations are for locals.
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
- bool HomPrologEpilog = homogeneousPrologEpilog(MF);
if (CombineSPBump) {
assert(!SVEStackSize && "Cannot combine SP bump with SVE");
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(-NumBytes), TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
NumBytes = 0;
- } else if (HomPrologEpilog) {
- // Stack has been already adjusted.
- NumBytes -= PrologueSaveSize;
} else if (PrologueSaveSize != 0) {
MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI);
@@ -1233,20 +1181,13 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (CombineSPBump)
FPOffset += AFI->getLocalStackSize();
- if (HomPrologEpilog) {
- auto Prolog = MBBI;
- --Prolog;
- assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
- Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
- } else {
- // Issue sub fp, sp, FPOffset or
- // mov fp,sp when FPOffset is zero.
- // Note: All stores of callee-saved registers are marked as "FrameSetup".
- // This code marks the instruction(s) that set the FP also.
- emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
- StackOffset::getFixed(FPOffset), TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
- }
+ // Issue sub fp, sp, FPOffset or
+ // mov fp,sp when FPOffset is zero.
+ // Note: All stores of callee-saved registers are marked as "FrameSetup".
+ // This code marks the instruction(s) that set the FP also.
+ emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
+ StackOffset::getFixed(FPOffset), TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
}
if (windowsRequiresStackProbe(MF, NumBytes)) {
@@ -1674,25 +1615,6 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// function.
if (MF.hasEHFunclets())
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
- if (homogeneousPrologEpilog(MF, &MBB)) {
- assert(!NeedsWinCFI);
- auto LastPopI = MBB.getFirstTerminator();
- if (LastPopI != MBB.begin()) {
- auto HomogeneousEpilog = std::prev(LastPopI);
- if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
- LastPopI = HomogeneousEpilog;
- }
-
- // Adjust local stack
- emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(-AFI->getLocalStackSize()), TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI);
-
- // SP has been already adjusted while restoring callee save regs.
- // We've bailed-out the case with adjusting SP for arguments.
- assert(AfterCSRPopSize == 0);
- return;
- }
bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
// Assume we can't combine the last pop with the sp restore.
@@ -2411,22 +2333,6 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
MBB.addLiveIn(AArch64::X18);
}
- if (homogeneousPrologEpilog(MF)) {
- auto MIB = BuildMI(MBB, MI, DL, TII.get(AArch64::HOM_Prolog))
- .setMIFlag(MachineInstr::FrameSetup);
-
- for (auto &RPI : RegPairs) {
- MIB.addReg(RPI.Reg1);
- MIB.addReg(RPI.Reg2);
-
- // Update register live in.
- if (!MRI.isReserved(RPI.Reg1))
- MBB.addLiveIn(RPI.Reg1);
- if (!MRI.isReserved(RPI.Reg2))
- MBB.addLiveIn(RPI.Reg2);
- }
- return true;
- }
for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
++RPII) {
RegPairInfo RPI = *RPII;
@@ -2622,14 +2528,6 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
for (const RegPairInfo &RPI : reverse(RegPairs))
if (!RPI.isScalable())
EmitMI(RPI);
- } else if (homogeneousPrologEpilog(MF, &MBB)) {
- auto MIB = BuildMI(MBB, MI, DL, TII.get(AArch64::HOM_Epilog))
- .setMIFlag(MachineInstr::FrameDestroy);
- for (auto &RPI : RegPairs) {
- MIB.addReg(RPI.Reg1, RegState::Define);
- MIB.addReg(RPI.Reg2, RegState::Define);
- }
- return true;
} else
for (const RegPairInfo &RPI : RegPairs)
if (!RPI.isScalable())
@@ -2699,7 +2597,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// MachO's compact unwind format relies on all registers being stored in
// pairs.
// FIXME: the usual format is actually better if unwinding isn't needed.
- if (producePairRegisters(MF) && PairedReg != AArch64::NoRegister &&
+ if (produceCompactUnwindFrame(MF) && PairedReg != AArch64::NoRegister &&
!SavedRegs.test(PairedReg)) {
SavedRegs.set(PairedReg);
if (AArch64::GPR64RegClass.contains(PairedReg) &&
@@ -2778,7 +2676,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// MachO's compact unwind format relies on all registers being stored in
// pairs, so if we need to spill one extra for BigStack, then we need to
// store the pair.
- if (producePairRegisters(MF))
+ if (produceCompactUnwindFrame(MF))
SavedRegs.set(UnspilledCSGPRPaired);
ExtraCSSpill = UnspilledCSGPR;
}
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index c5b4ad48ddd8..80079a9d9836 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -124,16 +124,6 @@ class AArch64FrameLowering : public TargetFrameLowering {
SmallVectorImpl<int> &ObjectsToAllocate) const override;
private:
- /// Returns true if a homogeneous prolog or epilog code can be emitted
- /// for the size optimization. If so, HOM_Prolog/HOM_Epilog pseudo
- /// instructions are emitted in place. When Exit block is given, this check is
- /// for epilog.
- bool homogeneousPrologEpilog(MachineFunction &MF,
- MachineBasicBlock *Exit = nullptr) const;
-
- /// Returns true if CSRs should be paired.
- bool producePairRegisters(MachineFunction &MF) const;
-
bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
uint64_t StackBumpBytes) const;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index e5e75befd9cb..171d3dbaa814 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3896,14 +3896,6 @@ let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
Sched<[]>;
}
-// Pseudo instructions for homogeneous prolog/epilog
-let isPseudo = 1 in {
- // Save CSRs in order, {FPOffset}
- def HOM_Prolog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>;
- // Restore CSRs in order
- def HOM_Epilog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>;
-}
-
//===----------------------------------------------------------------------===//
// Floating point immediate move.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
deleted file mode 100644
index d032748f60c6..000000000000
--- a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
+++ /dev/null
@@ -1,613 +0,0 @@
-//===- AArch64LowerHomogeneousPrologEpilog.cpp ----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a pass that lowers homogeneous prolog/epilog instructions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AArch64InstrInfo.h"
-#include "AArch64Subtarget.h"
-#include "MCTargetDesc/AArch64InstPrinter.h"
-#include "Utils/AArch64BaseInfo.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/DebugLoc.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
-#include <sstream>
-
-using namespace llvm;
-
-#define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME \
- "AArch64 homogeneous prolog/epilog lowering pass"
-
-cl::opt<int> FrameHelperSizeThreshold(
- "frame-helper-size-threshold", cl::init(2), cl::Hidden,
- cl::desc("The minimum number of instructions that are outlined in a frame "
- "helper (default = 2)"));
-
-namespace {
-
-class AArch64LowerHomogeneousPE {
-public:
- const AArch64InstrInfo *TII;
-
- AArch64LowerHomogeneousPE(Module *M, MachineModuleInfo *MMI)
- : M(M), MMI(MMI) {}
-
- bool run();
- bool runOnMachineFunction(MachineFunction &Fn);
-
-private:
- Module *M;
- MachineModuleInfo *MMI;
-
- bool runOnMBB(MachineBasicBlock &MBB);
- bool runOnMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- MachineBasicBlock::iterator &NextMBBI);
-
- /// Lower a HOM_Prolog pseudo instruction into a helper call
- /// or a sequence of homogeneous stores.
- /// When a a fp setup follows, it can be optimized.
- bool lowerProlog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- MachineBasicBlock::iterator &NextMBBI);
- /// Lower a HOM_Epilog pseudo instruction into a helper call
- /// or a sequence of homogeneous loads.
- /// When a return follow, it can be optimized.
- bool lowerEpilog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- MachineBasicBlock::iterator &NextMBBI);
-};
-
-class AArch64LowerHomogeneousPrologEpilog : public ModulePass {
-public:
- static char ID;
-
- AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) {
- initializeAArch64LowerHomogeneousPrologEpilogPass(
- *PassRegistry::getPassRegistry());
- }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineModuleInfoWrapperPass>();
- AU.addPreserved<MachineModuleInfoWrapperPass>();
- AU.setPreservesAll();
- ModulePass::getAnalysisUsage(AU);
- }
- bool runOnModule(Module &M) override;
-
- StringRef getPassName() const override {
- return AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME;
- }
-};
-
-} // end anonymous namespace
-
-char AArch64LowerHomogeneousPrologEpilog::ID = 0;
-
-INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog,
- "aarch64-lower-homogeneous-prolog-epilog",
- AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false)
-
-bool AArch64LowerHomogeneousPrologEpilog::runOnModule(Module &M) {
- if (skipModule(M))
- return false;
-
- MachineModuleInfo *MMI =
- &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
- return AArch64LowerHomogeneousPE(&M, MMI).run();
-}
-
-bool AArch64LowerHomogeneousPE::run() {
- bool Changed = false;
- for (auto &F : *M) {
- if (F.empty())
- continue;
-
- MachineFunction *MF = MMI->getMachineFunction(F);
- if (!MF)
- continue;
- Changed |= runOnMachineFunction(*MF);
- }
-
- return Changed;
-}
-enum FrameHelperType { Prolog, PrologFrame, Epilog, EpilogTail };
-
-/// Return a frame helper name with the given CSRs and the helper type.
-/// For instance, a prolog helper that saves x19 and x20 is named as
-/// OUTLINED_FUNCTION_PROLOG_x19x20.
-static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs,
- FrameHelperType Type, unsigned FpOffset) {
- std::ostringstream RegStream;
- switch (Type) {
- case FrameHelperType::Prolog:
- RegStream << "OUTLINED_FUNCTION_PROLOG_";
- break;
- case FrameHelperType::PrologFrame:
- RegStream << "OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset << "_";
- break;
- case FrameHelperType::Epilog:
- RegStream << "OUTLINED_FUNCTION_EPILOG_";
- break;
- case FrameHelperType::EpilogTail:
- RegStream << "OUTLINED_FUNCTION_EPILOG_TAIL_";
- break;
- }
-
- for (auto Reg : Regs)
- RegStream << AArch64InstPrinter::getRegisterName(Reg);
-
- return RegStream.str();
-}
-
-/// Create a Function for the unique frame helper with the given name.
-/// Return a newly created MachineFunction with an empty MachineBasicBlock.
-static MachineFunction &createFrameHelperMachineFunction(Module *M,
- MachineModuleInfo *MMI,
- StringRef Name) {
- LLVMContext &C = M->getContext();
- Function *F = M->getFunction(Name);
- assert(F == nullptr && "Function has been created before");
- F = Function::Create(FunctionType::get(Type::getVoidTy(C), false),
- Function::ExternalLinkage, Name, M);
- assert(F && "Function was null!");
-
- // Use ODR linkage to avoid duplication.
- F->setLinkage(GlobalValue::LinkOnceODRLinkage);
- F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
-
- // Set no-opt/minsize, so we don't insert padding between outlined
- // functions.
- F->addFnAttr(Attribute::OptimizeNone);
- F->addFnAttr(Attribute::NoInline);
- F->addFnAttr(Attribute::MinSize);
- F->addFnAttr(Attribute::Naked);
-
- MachineFunction &MF = MMI->getOrCreateMachineFunction(*F);
- // Remove unnecessary register liveness and set NoVRegs.
- MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
- MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA);
- MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
- MF.getRegInfo().freezeReservedRegs(MF);
-
- // Create entry block.
- BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
- IRBuilder<> Builder(EntryBB);
- Builder.CreateRetVoid();
-
- // Insert the new block into the function.
- MachineBasicBlock *MBB = MF.CreateMachineBasicBlock();
- MF.insert(MF.begin(), MBB);
-
- return MF;
-}
-
-/// Emit a store-pair instruction for frame-setup.
-static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator Pos,
- const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
- int Offset, bool IsPreDec) {
- bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
- assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
- unsigned Opc;
- if (IsPreDec)
- Opc = IsFloat ? AArch64::STPDpre : AArch64::STPXpre;
- else
- Opc = IsFloat ? AArch64::STPDi : AArch64::STPXi;
-
- MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
- if (IsPreDec)
- MIB.addDef(AArch64::SP);
- MIB.addReg(Reg2)
- .addReg(Reg1)
- .addReg(AArch64::SP)
- .addImm(Offset)
- .setMIFlag(MachineInstr::FrameSetup);
-}
-
-/// Emit a load-pair instruction for frame-destroy.
-static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator Pos,
- const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
- int Offset, bool IsPostDec) {
- bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
- assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
- unsigned Opc;
- if (IsPostDec)
- Opc = IsFloat ? AArch64::LDPDpost : AArch64::LDPXpost;
- else
- Opc = IsFloat ? AArch64::LDPDi : AArch64::LDPXi;
-
- MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
- if (IsPostDec)
- MIB.addDef(AArch64::SP);
- MIB.addReg(Reg2)
- .addReg(Reg1)
- .addReg(AArch64::SP)
- .addImm(Offset)
- .setMIFlag(MachineInstr::FrameDestroy);
-}
-
-/// Return a unique function if a helper can be formed with the given Regs
-/// and frame type.
-/// 1) _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22:
-/// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller
-/// stp x20, x19, [sp, #16]
-/// ret
-///
-/// 2) _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22:
-/// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller
-/// stp x20, x19, [sp, #16]
-/// add fp, sp, #32
-/// ret
-///
-/// 3) _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22:
-/// mov x16, x30
-/// ldp x29, x30, [sp, #32]
-/// ldp x20, x19, [sp, #16]
-/// ldp x22, x21, [sp], #48
-/// ret x16
-///
-/// 4) _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22:
-/// ldp x29, x30, [sp, #32]
-/// ldp x20, x19, [sp, #16]
-/// ldp x22, x21, [sp], #48
-/// ret
-/// @param M module
-/// @param MMI machine module info
-/// @param Regs callee save regs that the helper will handle
-/// @param Type frame helper type
-/// @return a helper function
-static Function *getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI,
- SmallVectorImpl<unsigned> &Regs,
- FrameHelperType Type,
- unsigned FpOffset = 0) {
- assert(Regs.size() >= 2);
- auto Name = getFrameHelperName(Regs, Type, FpOffset);
- auto *F = M->getFunction(Name);
- if (F)
- return F;
-
- auto &MF = createFrameHelperMachineFunction(M, MMI, Name);
- MachineBasicBlock &MBB = *MF.begin();
- const TargetSubtargetInfo &STI = MF.getSubtarget();
- const TargetInstrInfo &TII = *STI.getInstrInfo();
-
- int Size = (int)Regs.size();
- switch (Type) {
- case FrameHelperType::Prolog:
- case FrameHelperType::PrologFrame: {
- // Compute the remaining SP adjust beyond FP/LR.
- auto LRIdx = std::distance(
- Regs.begin(), std::find(Regs.begin(), Regs.end(), AArch64::LR));
-
- // If the register stored to the lowest address is not LR, we must subtract
- // more from SP here.
- if (LRIdx != Size - 2) {
- assert(Regs[Size - 2] != AArch64::LR);
- emitStore(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1],
- LRIdx - Size + 2, true);
- }
-
- // Store CSRs in the reverse order.
- for (int I = Size - 3; I >= 0; I -= 2) {
- // FP/LR has been stored at call-site.
- if (Regs[I - 1] == AArch64::LR)
- continue;
- emitStore(MF, MBB, MBB.end(), TII, Regs[I - 1], Regs[I], Size - I - 1,
- false);
- }
- if (Type == FrameHelperType::PrologFrame)
- BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ADDXri))
- .addDef(AArch64::FP)
- .addUse(AArch64::SP)
- .addImm(FpOffset)
- .addImm(0)
- .setMIFlag(MachineInstr::FrameSetup);
-
- BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET))
- .addReg(AArch64::LR);
- break;
- }
- case FrameHelperType::Epilog:
- case FrameHelperType::EpilogTail:
- if (Type == FrameHelperType::Epilog)
- // Stash LR to X16
- BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ORRXrs))
- .addDef(AArch64::X16)
- .addReg(AArch64::XZR)
- .addUse(AArch64::LR)
- .addImm(0);
-
- for (int I = 0; I < Size - 2; I += 2)
- emitLoad(MF, MBB, MBB.end(), TII, Regs[I], Regs[I + 1], Size - I - 2,
- false);
- // Restore the last CSR with post-increment of SP.
- emitLoad(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], Size,
- true);
-
- BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET))
- .addReg(Type == FrameHelperType::Epilog ? AArch64::X16 : AArch64::LR);
- break;
- }
-
- return M->getFunction(Name);
-}
-
-/// This function checks if a frame helper should be used for
-/// HOM_Prolog/HOM_Epilog pseudo instruction expansion.
-/// @param MBB machine basic block
-/// @param NextMBBI next instruction following HOM_Prolog/HOM_Epilog
-/// @param Regs callee save registers that are saved or restored.
-/// @param Type frame helper type
-/// @return True if a use of helper is qualified.
-static bool shouldUseFrameHelper(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &NextMBBI,
- SmallVectorImpl<unsigned> &Regs,
- FrameHelperType Type) {
- const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
- auto RegCount = Regs.size();
- assert(RegCount > 0 && (RegCount % 2 == 0));
- // # of instructions that will be outlined.
- int InstCount = RegCount / 2;
-
- // Do not use a helper call when not saving LR.
- if (std::find(Regs.begin(), Regs.end(), AArch64::LR) == Regs.end())
- return false;
-
- switch (Type) {
- case FrameHelperType::Prolog:
- // Prolog helper cannot save FP/LR.
- InstCount--;
- break;
- case FrameHelperType::PrologFrame: {
- // Effecitvely no change in InstCount since FpAdjusment is included.
- break;
- }
- case FrameHelperType::Epilog:
- // Bail-out if X16 is live across the epilog helper because it is used in
- // the helper to handle X30.
- for (auto NextMI = NextMBBI; NextMI != MBB.end(); NextMI++) {
- if (NextMI->readsRegister(AArch64::W16, TRI))
- return false;
- }
- // Epilog may not be in the last block. Check the liveness in successors.
- for (const MachineBasicBlock *SuccMBB : MBB.successors()) {
- if (SuccMBB->isLiveIn(AArch64::W16) || SuccMBB->isLiveIn(AArch64::X16))
- return false;
- }
- // No change in InstCount for the regular epilog case.
- break;
- case FrameHelperType::EpilogTail: {
- // EpilogTail helper includes the caller's return.
- if (NextMBBI == MBB.end())
- return false;
- if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR)
- return false;
- InstCount++;
- break;
- }
- }
-
- return InstCount >= FrameHelperSizeThreshold;
-}
-
-/// Lower a HOM_Epilog pseudo instruction into a helper call while
-/// creating the helper on demand. Or emit a sequence of loads in place when not
-/// using a helper call.
-///
-/// 1. With a helper including ret
-/// HOM_Epilog x30, x29, x19, x20, x21, x22 ; MBBI
-/// ret ; NextMBBI
-/// =>
-/// b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22
-/// ... ; NextMBBI
-///
-/// 2. With a helper
-/// HOM_Epilog x30, x29, x19, x20, x21, x22
-/// =>
-/// bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22
-///
-/// 3. Without a helper
-/// HOM_Epilog x30, x29, x19, x20, x21, x22
-/// =>
-/// ldp x29, x30, [sp, #32]
-/// ldp x20, x19, [sp, #16]
-/// ldp x22, x21, [sp], #48
-bool AArch64LowerHomogeneousPE::lowerEpilog(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- MachineBasicBlock::iterator &NextMBBI) {
- auto &MF = *MBB.getParent();
- MachineInstr &MI = *MBBI;
-
- DebugLoc DL = MI.getDebugLoc();
- SmallVector<unsigned, 8> Regs;
- for (auto &MO : MI.operands())
- if (MO.isReg())
- Regs.push_back(MO.getReg());
- int Size = (int)Regs.size();
- if (Size == 0)
- return false;
- // Registers are in pair.
- assert(Size % 2 == 0);
- assert(MI.getOpcode() == AArch64::HOM_Epilog);
-
- auto Return = NextMBBI;
- if (shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::EpilogTail)) {
- // When MBB ends with a return, emit a tail-call to the epilog helper
- auto *EpilogTailHelper =
- getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::EpilogTail);
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::TCRETURNdi))
- .addGlobalAddress(EpilogTailHelper)
- .addImm(0)
- .setMIFlag(MachineInstr::FrameDestroy)
- .copyImplicitOps(MI)
- .copyImplicitOps(*Return);
- NextMBBI = std::next(Return);
- Return->removeFromParent();
- } else if (shouldUseFrameHelper(MBB, NextMBBI, Regs,
- FrameHelperType::Epilog)) {
- // The default epilog helper case.
- auto *EpilogHelper =
- getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Epilog);
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
- .addGlobalAddress(EpilogHelper)
- .setMIFlag(MachineInstr::FrameDestroy)
- .copyImplicitOps(MI);
- } else {
- // Fall back to no-helper.
- for (int I = 0; I < Size - 2; I += 2)
- emitLoad(MF, MBB, MBBI, *TII, Regs[I], Regs[I + 1], Size - I - 2, false);
- // Restore the last CSR with post-increment of SP.
- emitLoad(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], Size, true);
- }
-
- MBBI->removeFromParent();
- return true;
-}
-
-/// Lower a HOM_Prolog pseudo instruction into a helper call while
-/// creating the helper on demand. Or emit a sequence of stores in place when
-/// not using a helper call.
-///
-/// 1. With a helper including frame-setup
-/// HOM_Prolog x30, x29, x19, x20, x21, x22, 32
-/// =>
-/// stp x29, x30, [sp, #-16]!
-/// bl _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22
-///
-/// 2. With a helper
-/// HOM_Prolog x30, x29, x19, x20, x21, x22
-/// =>
-/// stp x29, x30, [sp, #-16]!
-/// bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22
-///
-/// 3. Without a helper
-/// HOM_Prolog x30, x29, x19, x20, x21, x22
-/// =>
-/// stp x22, x21, [sp, #-48]!
-/// stp x20, x19, [sp, #16]
-/// stp x29, x30, [sp, #32]
-bool AArch64LowerHomogeneousPE::lowerProlog(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- MachineBasicBlock::iterator &NextMBBI) {
- auto &MF = *MBB.getParent();
- MachineInstr &MI = *MBBI;
-
- DebugLoc DL = MI.getDebugLoc();
- SmallVector<unsigned, 8> Regs;
- int LRIdx = 0;
- Optional<int> FpOffset;
- for (auto &MO : MI.operands()) {
- if (MO.isReg()) {
- if (MO.getReg() == AArch64::LR)
- LRIdx = Regs.size();
- Regs.push_back(MO.getReg());
- } else if (MO.isImm()) {
- FpOffset = MO.getImm();
- }
- }
- int Size = (int)Regs.size();
- if (Size == 0)
- return false;
- // Allow compact unwind case only for oww.
- assert(Size % 2 == 0);
- assert(MI.getOpcode() == AArch64::HOM_Prolog);
-
- if (FpOffset &&
- shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::PrologFrame)) {
- // FP/LR is stored at the top of stack before the prolog helper call.
- emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true);
- auto *PrologFrameHelper = getOrCreateFrameHelper(
- M, MMI, Regs, FrameHelperType::PrologFrame, *FpOffset);
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
- .addGlobalAddress(PrologFrameHelper)
- .setMIFlag(MachineInstr::FrameSetup)
- .copyImplicitOps(MI)
- .addReg(AArch64::FP, RegState::Implicit | RegState::Define)
- .addReg(AArch64::SP, RegState::Implicit);
- } else if (!FpOffset && shouldUseFrameHelper(MBB, NextMBBI, Regs,
- FrameHelperType::Prolog)) {
- // FP/LR is stored at the top of stack before the prolog helper call.
- emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true);
- auto *PrologHelper =
- getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Prolog);
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
- .addGlobalAddress(PrologHelper)
- .setMIFlag(MachineInstr::FrameSetup)
- .copyImplicitOps(MI);
- } else {
- // Fall back to no-helper.
- emitStore(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], -Size, true);
- for (int I = Size - 3; I >= 0; I -= 2)
- emitStore(MF, MBB, MBBI, *TII, Regs[I - 1], Regs[I], Size - I - 1, false);
- if (FpOffset) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri))
- .addDef(AArch64::FP)
- .addUse(AArch64::SP)
- .addImm(*FpOffset)
- .addImm(0)
- .setMIFlag(MachineInstr::FrameSetup);
- }
- }
-
- MBBI->removeFromParent();
- return true;
-}
-
-/// Process each machine instruction
-/// @param MBB machine basic block
-/// @param MBBI current instruction iterator
-/// @param NextMBBIT next instruction iterator which can be updated
-/// @return True when IR is changed.
-bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- MachineBasicBlock::iterator &NextMBBI) {
- MachineInstr &MI = *MBBI;
- unsigned Opcode = MI.getOpcode();
- switch (Opcode) {
- default:
- break;
- case AArch64::HOM_Prolog:
- return lowerProlog(MBB, MBBI, NextMBBI);
- case AArch64::HOM_Epilog:
- return lowerEpilog(MBB, MBBI, NextMBBI);
- }
- return false;
-}
-
-bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) {
- bool Modified = false;
-
- MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
- while (MBBI != E) {
- MachineBasicBlock::iterator NMBBI = std::next(MBBI);
- Modified |= runOnMI(MBB, MBBI, NMBBI);
- MBBI = NMBBI;
- }
-
- return Modified;
-}
-
-bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) {
- TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
-
- bool Modified = false;
- for (auto &MBB : MF)
- Modified |= runOnMBB(MBB);
- return Modified;
-}
-
-ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() {
- return new AArch64LowerHomogeneousPrologEpilog();
-}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index adc1a6246185..bec1758a931b 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -161,8 +161,6 @@ static cl::opt<bool>
cl::desc("Enable the AAcrh64 branch target pass"),
cl::init(true));
-extern cl::opt<bool> EnableHomogeneousPrologEpilog;
-
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
// Register the target.
RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget());
@@ -199,7 +197,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
initializeAArch64SLSHardeningPass(*PR);
initializeAArch64StackTaggingPass(*PR);
initializeAArch64StackTaggingPreRAPass(*PR);
- initializeAArch64LowerHomogeneousPrologEpilogPass(*PR);
}
//===----------------------------------------------------------------------===//
@@ -637,9 +634,6 @@ void AArch64PassConfig::addPostRegAlloc() {
}
void AArch64PassConfig::addPreSched2() {
- // Lower homogeneous frame instructions
- if (EnableHomogeneousPrologEpilog)
- addPass(createAArch64LowerHomogeneousPrologEpilogPass());
// Expand some pseudo instructions to allow proper scheduling.
addPass(createAArch64ExpandPseudoPass());
// Use load/store pair instructions when possible.
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index d9fd81c22ca9..0e9503baf180 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -59,7 +59,6 @@ add_llvm_target(AArch64CodeGen
AArch64ISelLowering.cpp
AArch64InstrInfo.cpp
AArch64LoadStoreOptimizer.cpp
- AArch64LowerHomogeneousPrologEpilog.cpp
AArch64MachineFunctionInfo.cpp
AArch64MacroFusion.cpp
AArch64MCInstLower.cpp
diff --git a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-bad-outline.mir b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-bad-outline.mir
deleted file mode 100644
index 94c27e1bf1aa..000000000000
--- a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-bad-outline.mir
+++ /dev/null
@@ -1,40 +0,0 @@
-# RUN: llc -mtriple=arm64-applie-ios7.0 -start-before=aarch64-lower-homogeneous-prolog-epilog -homogeneous-prolog-epilog %s -o - | FileCheck %s
-#
-# This test ensure no outlined epilog is formed when X16 is live across the helper.
---- |
- @FuncPtr = local_unnamed_addr global i32 (i32)* null, align 8
-
- define i32 @_Z3fooi(i32) minsize "frame-pointer"="all" {
- ret i32 0
- }
-
- declare i32 @_Z3gooii(i32, i32)
-...
----
-name: _Z3fooi
-tracksRegLiveness: true
-body: |
- bb.0:
- liveins: $w0, $lr, $x19, $x20
- successors: %bb.1
- frame-setup HOM_Prolog $lr, $fp, $x19, $x20, 16
- frame-setup CFI_INSTRUCTION def_cfa $w29, 16
- frame-setup CFI_INSTRUCTION offset $w30, -8
- frame-setup CFI_INSTRUCTION offset $w29, -16
- frame-setup CFI_INSTRUCTION offset $w19, -24
- frame-setup CFI_INSTRUCTION offset $w20, -32
- $w19 = nsw ADDWri $w0, 1, 0
- $w1 = ORRWrr $wzr, $w0
- $w0 = ORRWrr $wzr, $w19
- BL @_Z3gooii, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit $w1, implicit-def $sp, implicit-def $w0
- $x8 = ADRP target-flags(aarch64-page) @FuncPtr
- $x16 = LDRXui killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @FuncPtr
- $w0 = nsw ADDWrr renamable $w0, killed renamable $w19
- $lr, $fp, $x19, $x20 = frame-destroy HOM_Epilog
- B %bb.1
-
- bb.1:
- liveins: $w0, $x16
- TCRETURNri killed renamable $x16, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0
-# CHECK: _OUTLINED_FUNCTION_PROLOG_FRAME16_x30x29x19x20:
-# CHECK-NOT: _OUTLINED_FUNCTION_EPILOG_x30x29x19x20:
diff --git a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-frame-tail.ll b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-frame-tail.ll
deleted file mode 100644
index 32cc850dfa73..000000000000
--- a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-frame-tail.ll
+++ /dev/null
@@ -1,85 +0,0 @@
-; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -homogeneous-prolog-epilog | FileCheck %s
-; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -homogeneous-prolog-epilog | FileCheck %s --check-prefixes=CHECK-LINUX
-
-; CHECK-LABEL: __Z3foofffi:
-; CHECK: stp x29, x30, [sp, #-16]!
-; CHECK-NEXT: bl _OUTLINED_FUNCTION_PROLOG_FRAME48_x30x29x19x20d8d9d10d11
-; CHECK: bl __Z3goof
-; CHECK: bl __Z3goof
-; CHECK: b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20d8d9d10d11
-
-; CHECK-LINUX-LABEL: _Z3foofffi:
-; CHECK-LINUX: stp x29, x30, [sp, #-32]!
-; CHECK-LINUX-NEXT: bl OUTLINED_FUNCTION_PROLOG_FRAME32_x19x20x30x29d8d9d10d11
-; CHECK-LINUX: bl _Z3goof
-; CHECK-LINUX: bl _Z3goof
-; CHECK-LINUX: b OUTLINED_FUNCTION_EPILOG_TAIL_x19x20x30x29d8d9d10d11
-
-define float @_Z3foofffi(float %b, float %x, float %y, i32 %z) ssp minsize "frame-pointer"="non-leaf" {
-entry:
- %inc = fadd float %b, 1.000000e+00
- %add = fadd float %inc, %x
- %add1 = fadd float %add, %y
- %conv = sitofp i32 %z to float
- %sub = fsub float %add1, %conv
- %dec = add nsw i32 %z, -1
- %call = tail call float @_Z3goof(float %inc) #2
- %call2 = tail call float @_Z3goof(float %sub) #2
- %add3 = fadd float %call, %call2
- %mul = fmul float %inc, %add3
- %add4 = fadd float %sub, %mul
- %conv5 = sitofp i32 %dec to float
- %sub6 = fsub float %add4, %conv5
- ret float %sub6
-}
-
-; CHECK-LABEL: _Z3zoov:
-; CHECK: stp x29, x30, [sp, #-16]!
-; CHECK: bl __Z3hoo
-; CHECK: b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29
-
-define i32 @_Z3zoov() nounwind ssp minsize {
- %1 = tail call i32 @_Z3hoov() #2
- %2 = add nsw i32 %1, 1
- ret i32 %2
-}
-
-
-declare float @_Z3goof(float) nounwind ssp minsize
-declare i32 @_Z3hoov() nounwind ssp optsize
-
-; CHECK-LABEL: _OUTLINED_FUNCTION_PROLOG_FRAME48_x30x29x19x20d8d9d10d11:
-; CHECK: stp d11, d10, [sp, #-48]!
-; CHECK-NEXT: stp d9, d8, [sp, #16]
-; CHECK-NEXT: stp x20, x19, [sp, #32]
-; CHECK-NEXT: add x29, sp, #48
-; CHECK-NEXT: ret
-
-; CHECK-LABEL: _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20d8d9d10d11:
-; CHECK: ldp x29, x30, [sp, #48]
-; CHECK-NEXT: ldp x20, x19, [sp, #32]
-; CHECK-NEXT: ldp d9, d8, [sp, #16]
-; CHECK-NEXT: ldp d11, d10, [sp], #64
-; CHECK-NEXT: ret
-
-; CHECK-LABEL: _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29:
-; CHECK: ldp x29, x30, [sp], #16
-; CHECK-NEXT: ret
-
-; CHECK-LINUX-LABEL: OUTLINED_FUNCTION_PROLOG_FRAME32_x19x20x30x29d8d9d10d11:
-; CHECK-LINUX: stp d11, d10, [sp, #-32]!
-; CHECK-LINUX-NEXT: stp d9, d8, [sp, #16]
-; CHECK-LINUX-NEXT: stp x20, x19, [sp, #48]
-; CHECK-LINUX-NEXT: add x29, sp, #32
-; CHECK-LINUX-NEXT: ret
-
-; CHECK-LINUX-LABEL: OUTLINED_FUNCTION_EPILOG_TAIL_x19x20x30x29d8d9d10d11:
-; CHECK-LINUX: ldp x20, x19, [sp, #48]
-; CHECK-LINUX-NEXT: ldp x29, x30, [sp, #32]
-; CHECK-LINUX-NEXT: ldp d9, d8, [sp, #16]
-; CHECK-LINUX-NEXT: ldp d11, d10, [sp], #64
-; CHECK-LINUX-NEXT: ret
-
-; CHECK-LINUX-LABEL: OUTLINED_FUNCTION_EPILOG_TAIL_x30x29:
-; CHECK-LINUX: ldp x29, x30, [sp], #16
-; CHECK-LINUX-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-no-helper.ll b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-no-helper.ll
deleted file mode 100644
index a5da39cb5435..000000000000
--- a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-no-helper.ll
+++ /dev/null
@@ -1,70 +0,0 @@
-; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -homogeneous-prolog-epilog -frame-helper-size-threshold=6 | FileCheck %s
-; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -homogeneous-prolog-epilog -frame-helper-size-threshold=6 | FileCheck %s --check-prefixes=CHECK-LINUX
-
-; CHECK-LABEL: __Z3foofffi:
-; CHECK: stp d11, d10, [sp, #-64]!
-; CHECK-NEXT: stp d9, d8, [sp, #16]
-; CHECK-NEXT: stp x20, x19, [sp, #32]
-; CHECK-NEXT: stp x29, x30, [sp, #48]
-; CHECK-NEXT: add x29, sp, #48
-; CHECK: bl __Z3goof
-; CHECK: bl __Z3goof
-; CHECK: ldp x29, x30, [sp, #48]
-; CHECK: ldp x20, x19, [sp, #32]
-; CHECK: ldp d9, d8, [sp, #16]
-; CHECK: ldp d11, d10, [sp], #64
-; CHECK: ret
-
-; CHECK-LINUX-LABEL: _Z3foofffi:
-; CHECK-LINUX: stp d11, d10, [sp, #-64]!
-; CHECK-LINUX-NEXT: stp d9, d8, [sp, #16]
-; CHECK-LINUX-NEXT: stp x29, x30, [sp, #32]
-; CHECK-LINUX-NEXT: stp x20, x19, [sp, #48]
-; CHECK-LINUX-NEXT: add x29, sp, #32
-; CHECK-LINUX: bl _Z3goof
-; CHECK-LINUX: bl _Z3goof
-; CHECK-LINUX: ldp x20, x19, [sp, #48]
-; CHECK-LINUX: ldp x29, x30, [sp, #32]
-; CHECK-LINUX: ldp d9, d8, [sp, #16]
-; CHECK-LINUX: ldp d11, d10, [sp], #64
-; CHECK-LINUX: ret
-
-define float @_Z3foofffi(float %b, float %x, float %y, i32 %z) uwtable ssp minsize "frame-pointer"="non-leaf" {
-entry:
- %inc = fadd float %b, 1.000000e+00
- %add = fadd float %inc, %x
- %add1 = fadd float %add, %y
- %conv = sitofp i32 %z to float
- %sub = fsub float %add1, %conv
- %dec = add nsw i32 %z, -1
- %call = tail call float @_Z3goof(float %inc) #2
- %call2 = tail call float @_Z3goof(float %sub) #2
- %add3 = fadd float %call, %call2
- %mul = fmul float %inc, %add3
- %add4 = fadd float %sub, %mul
- %conv5 = sitofp i32 %dec to float
- %sub6 = fsub float %add4, %conv5
- ret float %sub6
-}
-
-; CHECK-LABEL: __Z3zoov:
-; CHECK: stp x29, x30, [sp, #-16]!
-; CHECK: bl __Z3hoo
-; CHECK: ldp x29, x30, [sp], #16
-; CHECK-NEXT: ret
-
-; CHECK-LINUX-LABEL: _Z3zoov:
-; CHECK-LINUX: stp x29, x30, [sp, #-16]!
-; CHECK-LINUX: bl _Z3hoo
-; CHECK-LINUX: ldp x29, x30, [sp], #16
-; CHECK-LINUX-NEXT: ret
-
-define i32 @_Z3zoov() nounwind ssp minsize {
- %1 = tail call i32 @_Z3hoov() #2
- %2 = add nsw i32 %1, 1
- ret i32 %2
-}
-
-
-declare float @_Z3goof(float) nounwind ssp minsize
-declare i32 @_Z3hoov() nounwind ssp minsize
diff --git a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog.ll b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog.ll
deleted file mode 100644
index 498bbb5bd69b..000000000000
--- a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -homogeneous-prolog-epilog| FileCheck %s
-; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -homogeneous-prolog-epilog | FileCheck %s --check-prefixes=CHECK-LINUX
-
-; CHECK-LABEL: __Z3hooii:
-; CHECK: stp x29, x30, [sp, #-16]!
-; CHECK-NEXT: bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22
-; CHECK: bl __Z3gooi
-; CHECK: bl __Z3gooi
-; CHECK: bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22
-; CHECK-NEXT: b __Z3gooi
-
-; CHECK-LINUX-LABEL: _Z3hooii:
-; CHECK-LINUX: stp x29, x30, [sp, #-48]!
-; CHECK-LINUX-NEXT: bl OUTLINED_FUNCTION_PROLOG_x19x20x21x22x30x29
-; CHECK-LINUX: bl _Z3gooi
-; CHECK-LINUX: bl _Z3gooi
-; CHECK-LINUX: bl OUTLINED_FUNCTION_EPILOG_x19x20x21x22x30x29
-; CHECK-LINUX-NEXT: b _Z3gooi
-
-define i32 @_Z3hooii(i32 %b, i32 %a) nounwind ssp minsize {
- %1 = tail call i32 @_Z3gooi(i32 %b)
- %2 = tail call i32 @_Z3gooi(i32 %a)
- %3 = add i32 %a, %b
- %4 = add i32 %3, %1
- %5 = add i32 %4, %2
- %6 = tail call i32 @_Z3gooi(i32 %5)
- ret i32 %6
-}
-
-declare i32 @_Z3gooi(i32);
-
-
-; CHECK-LABEL: _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22:
-; CHECK: stp x22, x21, [sp, #-32]!
-; CHECK-NEXT: stp x20, x19, [sp, #16]
-; CHECK-NEXT: ret
-
-; CHECK-LABEL: _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22:
-; CHECK: mov x16, x30
-; CHECK-NEXT: ldp x29, x30, [sp, #32]
-; CHECK-NEXT: ldp x20, x19, [sp, #16]
-; CHECK-NEXT: ldp x22, x21, [sp], #48
-; CHECK-NEXT: ret x16
-
-; CHECK-LINUX-LABEL: OUTLINED_FUNCTION_PROLOG_x19x20x21x22x30x29:
-; CHECK-LINUX: stp x22, x21, [sp, #16]
-; CHECK-LINUX-NEXT: stp x20, x19, [sp, #32]
-; CHECK-LINUX-NEXT: ret
-
-; CHECK-LINUX-LABEL: OUTLINED_FUNCTION_EPILOG_x19x20x21x22x30x29:
-; CHECK-LINUX: mov x16, x30
-; CHECK-LINUX-NEXT: ldp x20, x19, [sp, #32]
-; CHECK-LINUX-NEXT: ldp x22, x21, [sp, #16]
-; CHECK-LINUX-NEXT: ldp x29, x30, [sp], #48
-; CHECK-LINUX-NEXT: ret x16
More information about the llvm-commits
mailing list