[llvm] r309356 - [MachineOutliner] NFC: Split up getOutliningBenefit

Jessica Paquette via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 27 20:21:59 PDT 2017


Author: paquette
Date: Thu Jul 27 20:21:58 2017
New Revision: 309356

URL: http://llvm.org/viewvc/llvm-project?rev=309356&view=rev
Log:
[MachineOutliner] NFC: Split up getOutliningBenefit

This is some more cleanup in preparation for some actual
functional changes. This splits getOutliningBenefit into
two cost functions: getOutliningCallOverhead and
getOutliningFrameOverhead. These functions return the
number of instructions that would be required to call
a specific function and the number of instructions
that would be required to construct a frame for a
specific funtion. The actual outlining benefit logic
is moved into the outliner, which calls these functions.

The goal of refactoring getOutliningBenefit is to:

- Get us closer to getting rid of the IsTailCall flag

- Further split up "target-specific" things and
"general algorithm" things



Modified:
    llvm/trunk/include/llvm/Target/TargetInstrInfo.h
    llvm/trunk/lib/CodeGen/MachineOutliner.cpp
    llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
    llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/lib/Target/X86/X86InstrInfo.h

Modified: llvm/trunk/include/llvm/Target/TargetInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetInstrInfo.h?rev=309356&r1=309355&r2=309356&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetInstrInfo.h (original)
+++ llvm/trunk/include/llvm/Target/TargetInstrInfo.h Thu Jul 27 20:21:58 2017
@@ -55,7 +55,7 @@ class TargetRegisterInfo;
 class TargetSchedModel;
 class TargetSubtargetInfo;
 
-template<class T> class SmallVectorImpl;
+template <class T> class SmallVectorImpl;
 
 //---------------------------------------------------------------------------
 ///
@@ -66,8 +66,7 @@ public:
   TargetInstrInfo(unsigned CFSetupOpcode = ~0u, unsigned CFDestroyOpcode = ~0u,
                   unsigned CatchRetOpcode = ~0u, unsigned ReturnOpcode = ~0u)
       : CallFrameSetupOpcode(CFSetupOpcode),
-        CallFrameDestroyOpcode(CFDestroyOpcode),
-        CatchRetOpcode(CatchRetOpcode),
+        CallFrameDestroyOpcode(CFDestroyOpcode), CatchRetOpcode(CatchRetOpcode),
         ReturnOpcode(ReturnOpcode) {}
   TargetInstrInfo(const TargetInstrInfo &) = delete;
   TargetInstrInfo &operator=(const TargetInstrInfo &) = delete;
@@ -79,8 +78,7 @@ public:
 
   /// Given a machine instruction descriptor, returns the register
   /// class constraint for OpNum, or NULL.
-  const TargetRegisterClass *getRegClass(const MCInstrDesc &TID,
-                                         unsigned OpNum,
+  const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum,
                                          const TargetRegisterInfo *TRI,
                                          const MachineFunction &MF) const;
 
@@ -139,8 +137,7 @@ protected:
   /// the fixed result pair is equal to or equivalent to the source pair of
   /// indices: (CommutableOpIdx1, CommutableOpIdx2). It is assumed here that
   /// the pairs (x,y) and (y,x) are equivalent.
-  static bool fixCommutedOpIndices(unsigned &ResultIdx1,
-                                   unsigned &ResultIdx2,
+  static bool fixCommutedOpIndices(unsigned &ResultIdx1, unsigned &ResultIdx2,
                                    unsigned CommutableOpIdx1,
                                    unsigned CommutableOpIdx2);
 
@@ -164,7 +161,7 @@ public:
   /// Returns true if the argument is a frame pseudo instruction.
   bool isFrameInstr(const MachineInstr &I) const {
     return I.getOpcode() == getCallFrameSetupOpcode() ||
-      I.getOpcode() == getCallFrameDestroyOpcode();
+           I.getOpcode() == getCallFrameDestroyOpcode();
   }
 
   /// Returns true if the argument is a frame setup pseudo instruction.
@@ -191,7 +188,8 @@ public:
   /// prior to the pair.
   int64_t getFrameTotalSize(const MachineInstr &I) const {
     if (isFrameSetup(I)) {
-      assert(I.getOperand(1).getImm() >= 0 && "Frame size must not be negative");
+      assert(I.getOperand(1).getImm() >= 0 &&
+             "Frame size must not be negative");
       return getFrameSize(I) + I.getOperand(1).getImm();
     }
     return getFrameSize(I);
@@ -211,9 +209,8 @@ public:
   /// destination. e.g. X86::MOVSX64rr32. If this returns true, then it's
   /// expected the pre-extension value is available as a subreg of the result
   /// register. This also returns the sub-register index in SubIdx.
-  virtual bool isCoalescableExtInstr(const MachineInstr &MI,
-                                     unsigned &SrcReg, unsigned &DstReg,
-                                     unsigned &SubIdx) const {
+  virtual bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
+                                     unsigned &DstReg, unsigned &SubIdx) const {
     return false;
   }
 
@@ -315,9 +312,7 @@ public:
   /// MachineSink determines on its own whether the instruction is safe to sink;
   /// this gives the target a hook to override the default behavior with regards
   /// to which instructions should be sunk.
-  virtual bool shouldSink(const MachineInstr &MI) const {
-    return true;
-  }
+  virtual bool shouldSink(const MachineInstr &MI) const { return true; }
 
   /// Re-issue the specified 'original' instruction at the
   /// specific location targeting a new destination register.
@@ -456,9 +451,8 @@ public:
   /// \note The generic implementation does not provide any support for
   /// MI.isExtractSubregLike(). In other words, one has to override
   /// getExtractSubregLikeInputs for target specific instructions.
-  bool
-  getExtractSubregInputs(const MachineInstr &MI, unsigned DefIdx,
-                         RegSubRegPairAndIdx &InputReg) const;
+  bool getExtractSubregInputs(const MachineInstr &MI, unsigned DefIdx,
+                              RegSubRegPairAndIdx &InputReg) const;
 
   /// Build the equivalent inputs of a INSERT_SUBREG for the given \p MI
   /// and \p DefIdx.
@@ -476,10 +470,9 @@ public:
   /// \note The generic implementation does not provide any support for
   /// MI.isInsertSubregLike(). In other words, one has to override
   /// getInsertSubregLikeInputs for target specific instructions.
-  bool
-  getInsertSubregInputs(const MachineInstr &MI, unsigned DefIdx,
-                        RegSubRegPair &BaseReg,
-                        RegSubRegPairAndIdx &InsertedReg) const;
+  bool getInsertSubregInputs(const MachineInstr &MI, unsigned DefIdx,
+                             RegSubRegPair &BaseReg,
+                             RegSubRegPairAndIdx &InsertedReg) const;
 
   /// Return true if two machine instructions would produce identical values.
   /// By default, this is only true when the two instructions
@@ -625,8 +618,8 @@ public:
                                      MachineBasicBlock *DestBB,
                                      const DebugLoc &DL,
                                      int *BytesAdded = nullptr) const {
-    return insertBranch(MBB, DestBB, nullptr,
-                        ArrayRef<MachineOperand>(), DL, BytesAdded);
+    return insertBranch(MBB, DestBB, nullptr, ArrayRef<MachineOperand>(), DL,
+                        BytesAdded);
   }
 
   /// Analyze the loop code, return true if it cannot be understoo. Upon
@@ -641,8 +634,8 @@ public:
   /// finished.  Return the value/register of the the new loop count.  We need
   /// this function when peeling off one or more iterations of a loop. This
   /// function assumes the nth iteration is peeled first.
-  virtual unsigned reduceLoopCount(MachineBasicBlock &MBB,
-                                   MachineInstr *IndVar, MachineInstr &Cmp,
+  virtual unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineInstr *IndVar,
+                                   MachineInstr &Cmp,
                                    SmallVectorImpl<MachineOperand> &Cond,
                                    SmallVectorImpl<MachineInstr *> &PrevInsts,
                                    unsigned Iter, unsigned MaxIter) const {
@@ -667,10 +660,9 @@ public:
   /// of the specified basic block, where the probability of the instructions
   /// being executed is given by Probability, and Confidence is a measure
   /// of our confidence that it will be properly predicted.
-  virtual
-  bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
-                           unsigned ExtraPredCycles,
-                           BranchProbability Probability) const {
+  virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+                                   unsigned ExtraPredCycles,
+                                   BranchProbability Probability) const {
     return false;
   }
 
@@ -680,12 +672,11 @@ public:
   /// predicates, where the probability of the true path being taken is given
   /// by Probability, and Confidence is a measure of our confidence that it
   /// will be properly predicted.
-  virtual bool
-  isProfitableToIfCvt(MachineBasicBlock &TMBB,
-                      unsigned NumTCycles, unsigned ExtraTCycles,
-                      MachineBasicBlock &FMBB,
-                      unsigned NumFCycles, unsigned ExtraFCycles,
-                      BranchProbability Probability) const {
+  virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTCycles,
+                                   unsigned ExtraTCycles,
+                                   MachineBasicBlock &FMBB, unsigned NumFCycles,
+                                   unsigned ExtraFCycles,
+                                   BranchProbability Probability) const {
     return false;
   }
 
@@ -695,9 +686,9 @@ public:
   /// The probability of the instructions being executed is given by
   /// Probability, and Confidence is a measure of our confidence that it
   /// will be properly predicted.
-  virtual bool
-  isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
-                            BranchProbability Probability) const {
+  virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+                                         unsigned NumCycles,
+                                         BranchProbability Probability) const {
     return false;
   }
 
@@ -735,9 +726,8 @@ public:
   /// @param TrueCycles  Latency from TrueReg to select output.
   /// @param FalseCycles Latency from FalseReg to select output.
   virtual bool canInsertSelect(const MachineBasicBlock &MBB,
-                               ArrayRef<MachineOperand> Cond,
-                               unsigned TrueReg, unsigned FalseReg,
-                               int &CondCycles,
+                               ArrayRef<MachineOperand> Cond, unsigned TrueReg,
+                               unsigned FalseReg, int &CondCycles,
                                int &TrueCycles, int &FalseCycles) const {
     return false;
   }
@@ -953,8 +943,7 @@ public:
   /// Set special operand attributes for new instructions after reassociation.
   virtual void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2,
                                      MachineInstr &NewMI1,
-                                     MachineInstr &NewMI2) const {
-  }
+                                     MachineInstr &NewMI2) const {}
 
   /// Return true when a target supports MachineCombiner.
   virtual bool useMachineCombiner() const { return false; }
@@ -1007,9 +996,9 @@ protected:
   /// \pre MI.isExtractSubregLike().
   ///
   /// \see TargetInstrInfo::getExtractSubregInputs.
-  virtual bool getExtractSubregLikeInputs(
-      const MachineInstr &MI, unsigned DefIdx,
-      RegSubRegPairAndIdx &InputReg) const {
+  virtual bool getExtractSubregLikeInputs(const MachineInstr &MI,
+                                          unsigned DefIdx,
+                                          RegSubRegPairAndIdx &InputReg) const {
     return false;
   }
 
@@ -1040,7 +1029,7 @@ public:
   }
 
   virtual bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
-                                   SmallVectorImpl<SDNode*> &NewNodes) const {
+                                   SmallVectorImpl<SDNode *> &NewNodes) const {
     return false;
   }
 
@@ -1050,9 +1039,9 @@ public:
   /// possible. If LoadRegIndex is non-null, it is filled in with the operand
   /// index of the operand which will hold the register holding the loaded
   /// value.
-  virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
-                                      bool UnfoldLoad, bool UnfoldStore,
-                                      unsigned *LoadRegIndex = nullptr) const {
+  virtual unsigned
+  getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore,
+                             unsigned *LoadRegIndex = nullptr) const {
     return 0;
   }
 
@@ -1061,7 +1050,8 @@ public:
   /// pointers are the same and the only differences between the two addresses
   /// are the offset. It also returns the offsets by reference.
   virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
-                                    int64_t &Offset1, int64_t &Offset2) const {
+                                       int64_t &Offset1,
+                                       int64_t &Offset2) const {
     return false;
   }
 
@@ -1115,8 +1105,8 @@ public:
 
   /// Reverses the branch condition of the specified condition list,
   /// returning false on success and true if it cannot be reversed.
-  virtual
-  bool reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  virtual bool
+  reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
     return true;
   }
 
@@ -1128,14 +1118,10 @@ public:
   virtual void getNoop(MCInst &NopInst) const;
 
   /// Return true for post-incremented instructions.
-  virtual bool isPostIncrement(const MachineInstr &MI) const {
-    return false;
-  }
+  virtual bool isPostIncrement(const MachineInstr &MI) const { return false; }
 
   /// Returns true if the instruction is already predicated.
-  virtual bool isPredicated(const MachineInstr &MI) const {
-    return false;
-  }
+  virtual bool isPredicated(const MachineInstr &MI) const { return false; }
 
   /// Returns true if the instruction is a
   /// terminator instruction that has not been predicated.
@@ -1147,9 +1133,8 @@ public:
   }
 
   /// Returns true if the tail call can be made conditional on BranchCond.
-  virtual bool
-  canMakeTailCallConditional(SmallVectorImpl<MachineOperand> &Cond,
-                             const MachineInstr &TailCall) const {
+  virtual bool canMakeTailCallConditional(SmallVectorImpl<MachineOperand> &Cond,
+                                          const MachineInstr &TailCall) const {
     return false;
   }
 
@@ -1167,9 +1152,8 @@ public:
 
   /// Returns true if the first specified predicate
   /// subsumes the second, e.g. GE subsumes GT.
-  virtual
-  bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
-                         ArrayRef<MachineOperand> Pred2) const {
+  virtual bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+                                 ArrayRef<MachineOperand> Pred2) const {
     return false;
   }
 
@@ -1207,25 +1191,25 @@ public:
 
   /// Allocate and return a hazard recognizer to use for this target when
   /// scheduling the machine instructions before register allocation.
-  virtual ScheduleHazardRecognizer*
+  virtual ScheduleHazardRecognizer *
   CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
                                const ScheduleDAG *DAG) const;
 
   /// Allocate and return a hazard recognizer to use for this target when
   /// scheduling the machine instructions before register allocation.
-  virtual ScheduleHazardRecognizer*
-  CreateTargetMIHazardRecognizer(const InstrItineraryData*,
+  virtual ScheduleHazardRecognizer *
+  CreateTargetMIHazardRecognizer(const InstrItineraryData *,
                                  const ScheduleDAG *DAG) const;
 
   /// Allocate and return a hazard recognizer to use for this target when
   /// scheduling the machine instructions after register allocation.
-  virtual ScheduleHazardRecognizer*
-  CreateTargetPostRAHazardRecognizer(const InstrItineraryData*,
+  virtual ScheduleHazardRecognizer *
+  CreateTargetPostRAHazardRecognizer(const InstrItineraryData *,
                                      const ScheduleDAG *DAG) const;
 
   /// Allocate and return a hazard recognizer to use for by non-scheduling
   /// passes.
-  virtual ScheduleHazardRecognizer*
+  virtual ScheduleHazardRecognizer *
   CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const {
     return nullptr;
   }
@@ -1502,7 +1486,7 @@ public:
 
   /// \brief Return the value to use for the MachineCSE's LookAheadLimit,
   /// which is a heuristic used for CSE'ing phys reg defs.
-  virtual unsigned getMachineCSELookAheadLimit () const {
+  virtual unsigned getMachineCSELookAheadLimit() const {
     // The default lookahead is small to prevent unprofitable quadratic
     // behavior.
     return 5;
@@ -1569,13 +1553,24 @@ public:
     return false;
   }
 
-  /// \brief Return how many instructions would be saved by outlining a
-  /// sequence containing \p SequenceSize instructions that appears
-  /// \p Occurrences times in a module.
-  virtual unsigned getOutliningBenefit(size_t SequenceSize, size_t Occurrences,
-                                       bool CanBeTailCall) const {
+  /// \brief Returns the number of instructions that will be taken to call a
+  /// function defined by the sequence on the closed interval [ \p StartIt, \p
+  /// EndIt].
+  virtual size_t
+  getOutliningCallOverhead(MachineBasicBlock::iterator &StartIt,
+                           MachineBasicBlock::iterator &EndIt) const {
+    llvm_unreachable(
+        "Target didn't implement TargetInstrInfo::getOutliningCallOverhead!");
+  }
+
+  /// \brief Returns the number of instructions that will be taken to construct
+  /// an outlined function frame for a function defined on the closed interval
+  /// [ \p StartIt, \p EndIt].
+  virtual size_t
+  getOutliningFrameOverhead(MachineBasicBlock::iterator &StartIt,
+                            MachineBasicBlock::iterator &EndIt) const {
     llvm_unreachable(
-        "Target didn't implement TargetInstrInfo::getOutliningBenefit!");
+        "Target didn't implement TargetInstrInfo::getOutliningCallOverhead!");
   }
 
   /// Represents how an instruction should be mapped by the outliner.
@@ -1583,7 +1578,7 @@ public:
   /// \p Illegal instructions are those which cannot be outlined.
   /// \p Invisible instructions are instructions which can be outlined, but
   /// shouldn't actually impact the outlining result.
-  enum MachineOutlinerInstrType {Legal, Illegal, Invisible};
+  enum MachineOutlinerInstrType { Legal, Illegal, Invisible };
 
   /// Returns how or if \p MI should be outlined.
   virtual MachineOutlinerInstrType getOutliningType(MachineInstr &MI) const {
@@ -1635,25 +1630,23 @@ private:
 };
 
 /// \brief Provide DenseMapInfo for TargetInstrInfo::RegSubRegPair.
-template<>
-struct DenseMapInfo<TargetInstrInfo::RegSubRegPair> {
+template <> struct DenseMapInfo<TargetInstrInfo::RegSubRegPair> {
   using RegInfo = DenseMapInfo<unsigned>;
 
   static inline TargetInstrInfo::RegSubRegPair getEmptyKey() {
     return TargetInstrInfo::RegSubRegPair(RegInfo::getEmptyKey(),
-                         RegInfo::getEmptyKey());
+                                          RegInfo::getEmptyKey());
   }
 
   static inline TargetInstrInfo::RegSubRegPair getTombstoneKey() {
     return TargetInstrInfo::RegSubRegPair(RegInfo::getTombstoneKey(),
-                         RegInfo::getTombstoneKey());
+                                          RegInfo::getTombstoneKey());
   }
 
   /// \brief Reuse getHashValue implementation from
   /// std::pair<unsigned, unsigned>.
   static unsigned getHashValue(const TargetInstrInfo::RegSubRegPair &Val) {
-    std::pair<unsigned, unsigned> PairVal =
-        std::make_pair(Val.Reg, Val.SubReg);
+    std::pair<unsigned, unsigned> PairVal = std::make_pair(Val.Reg, Val.SubReg);
     return DenseMapInfo<std::pair<unsigned, unsigned>>::getHashValue(PairVal);
   }
 

Modified: llvm/trunk/lib/CodeGen/MachineOutliner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineOutliner.cpp?rev=309356&r1=309355&r2=309356&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/MachineOutliner.cpp (original)
+++ llvm/trunk/lib/CodeGen/MachineOutliner.cpp Thu Jul 27 20:21:58 2017
@@ -114,7 +114,7 @@ struct OutlinedFunction {
   /// This is initialized after we go through and create the actual function.
   MachineFunction *MF = nullptr;
 
-  /// A number assigned to this function which appears at the end of its name.
+  /// A numbefr assigned to this function which appears at the end of its name.
   size_t Name;
 
   /// The number of candidates for this OutlinedFunction.
@@ -813,11 +813,13 @@ struct MachineOutliner : public ModulePa
   ///
   /// \param[in,out] CandidateList A list of outlining candidates.
   /// \param[in,out] FunctionList A list of functions to be outlined.
+  /// \param Mapper Contains instruction mapping info for outlining.
   /// \param MaxCandidateLen The length of the longest candidate.
   /// \param TII TargetInstrInfo for the module.
   void pruneOverlaps(std::vector<Candidate> &CandidateList,
                      std::vector<OutlinedFunction> &FunctionList,
-                     unsigned MaxCandidateLen, const TargetInstrInfo &TII);
+                     InstructionMapper &Mapper, unsigned MaxCandidateLen,
+                     const TargetInstrInfo &TII);
 
   /// Construct a suffix tree on the instructions in \p M and outline repeated
   /// strings from that tree.
@@ -859,23 +861,40 @@ MachineOutliner::findCandidates(SuffixTr
     if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree)
       continue;
 
-    // How many instructions would outlining this string save?
+    // Figure out if this candidate is beneficial.
     size_t StringLen = Leaf->ConcatLen - Leaf->size();
-    unsigned EndVal = ST.Str[Leaf->SuffixIdx + StringLen - 1];
+    size_t CallOverhead = 0;
+    size_t FrameOverhead = 0;
+    size_t SequenceOverhead = StringLen;
 
-    // Determine if this is going to be tail called.
-    // FIXME: The target should decide this. The outlining pass shouldn't care
-    // about things like tail calling. It should be representation-agnostic.
-    MachineInstr *LastInstr = Mapper.IntegerInstructionMap[EndVal];
-    assert(LastInstr && "Last instruction in sequence was unmapped!");
-    bool IsTailCall = LastInstr->isTerminator();
-    unsigned Benefit =
-        TII.getOutliningBenefit(StringLen, Parent.OccurrenceCount, IsTailCall);
+    // Figure out the call overhead for each instance of the sequence.
+    for (auto &ChildPair : Parent.Children) {
+      SuffixTreeNode *M = ChildPair.second;
+
+      if (M && M->IsInTree && M->isLeaf()) {
+        // Each sequence is over [StartIt, EndIt].
+        MachineBasicBlock::iterator StartIt = Mapper.InstrList[M->SuffixIdx];
+        MachineBasicBlock::iterator EndIt =
+            Mapper.InstrList[M->SuffixIdx + StringLen - 1];
+        CallOverhead += TII.getOutliningCallOverhead(StartIt, EndIt);
+      }
+    }
 
-    // If it's not beneficial, skip it.
-    if (Benefit < 1)
+    // Figure out how many instructions it'll take to construct an outlined
+    // function frame for this sequence.
+    MachineBasicBlock::iterator StartIt = Mapper.InstrList[Leaf->SuffixIdx];
+    MachineBasicBlock::iterator EndIt =
+        Mapper.InstrList[Leaf->SuffixIdx + StringLen - 1];
+    FrameOverhead = TII.getOutliningFrameOverhead(StartIt, EndIt);
+
+    size_t OutliningCost = CallOverhead + FrameOverhead + SequenceOverhead;
+    size_t NotOutliningCost = SequenceOverhead * Parent.OccurrenceCount;
+
+    if (NotOutliningCost <= OutliningCost)
       continue;
 
+    size_t Benefit = NotOutliningCost - OutliningCost;
+
     if (StringLen > MaxLen)
       MaxLen = StringLen;
 
@@ -910,6 +929,7 @@ MachineOutliner::findCandidates(SuffixTr
 
 void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList,
                                     std::vector<OutlinedFunction> &FunctionList,
+                                    InstructionMapper &Mapper,
                                     unsigned MaxCandidateLen,
                                     const TargetInstrInfo &TII) {
   // TODO: Experiment with interval trees or other interval-checking structures
@@ -993,8 +1013,18 @@ void MachineOutliner::pruneOverlaps(std:
         assert(F2.OccurrenceCount > 0 &&
                "Can't remove OutlinedFunction with no occurrences!");
         F2.OccurrenceCount--;
-        F2.Benefit = TII.getOutliningBenefit(F2.Sequence.size(),
-                                             F2.OccurrenceCount, F2.IsTailCall);
+
+        // Remove the call overhead from the removed sequence.
+        MachineBasicBlock::iterator StartIt = Mapper.InstrList[C2.StartIdx];
+        MachineBasicBlock::iterator EndIt =
+            Mapper.InstrList[C2.StartIdx + C2.Len - 1];
+        F2.Benefit += TII.getOutliningCallOverhead(StartIt, EndIt);
+        // Add back one instance of the sequence.
+
+        if (F2.Sequence.size() > F2.Benefit)
+          F2.Benefit = 0;
+        else
+          F2.Benefit -= F2.Sequence.size();
 
         C2.InCandidateList = false;
 
@@ -1009,8 +1039,19 @@ void MachineOutliner::pruneOverlaps(std:
         assert(F1.OccurrenceCount > 0 &&
                "Can't remove OutlinedFunction with no occurrences!");
         F1.OccurrenceCount--;
-        F1.Benefit = TII.getOutliningBenefit(F1.Sequence.size(),
-                                             F1.OccurrenceCount, F1.IsTailCall);
+
+        // Remove the call overhead from the removed sequence.
+        MachineBasicBlock::iterator StartIt = Mapper.InstrList[C1.StartIdx];
+        MachineBasicBlock::iterator EndIt =
+            Mapper.InstrList[C1.StartIdx + C1.Len - 1];
+        F2.Benefit += TII.getOutliningCallOverhead(StartIt, EndIt);
+
+        // Add back one instance of the sequence.
+        if (F1.Sequence.size() > F1.Benefit)
+          F1.Benefit = 0;
+        else
+          F1.Benefit -= F1.Sequence.size();
+
         C1.InCandidateList = false;
 
         DEBUG(dbgs() << "- Removed C1. \n";
@@ -1206,7 +1247,7 @@ bool MachineOutliner::runOnModule(Module
       buildCandidateList(CandidateList, FunctionList, ST, Mapper, *TII);
 
   // Remove candidates that overlap with other candidates.
-  pruneOverlaps(CandidateList, FunctionList, MaxCandidateLen, *TII);
+  pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen, *TII);
 
   // Outline each of the candidates and return true if something was outlined.
   return outline(M, CandidateList, FunctionList, Mapper);

Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=309356&r1=309355&r2=309356&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp Thu Jul 27 20:21:58 2017
@@ -52,17 +52,17 @@ using namespace llvm;
 #define GET_INSTRINFO_CTOR_DTOR
 #include "AArch64GenInstrInfo.inc"
 
-static cl::opt<unsigned>
-TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
-                    cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
-
-static cl::opt<unsigned>
-CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
-                    cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
+static cl::opt<unsigned> TBZDisplacementBits(
+    "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
+    cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
+
+static cl::opt<unsigned> CBZDisplacementBits(
+    "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
+    cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
 
 static cl::opt<unsigned>
-BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
-                    cl::desc("Restrict range of Bcc instructions (DEBUG)"));
+    BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
+                        cl::desc("Restrict range of Bcc instructions (DEBUG)"));
 
 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
     : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
@@ -172,8 +172,8 @@ bool AArch64InstrInfo::isBranchOffsetInR
   return isIntN(Bits, BrOffset / 4);
 }
 
-MachineBasicBlock *AArch64InstrInfo::getBranchDestBlock(
-  const MachineInstr &MI) const {
+MachineBasicBlock *
+AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
   default:
     llvm_unreachable("unexpected opcode!");
@@ -374,12 +374,9 @@ void AArch64InstrInfo::instantiateCondBr
   }
 }
 
-unsigned AArch64InstrInfo::insertBranch(MachineBasicBlock &MBB,
-                                        MachineBasicBlock *TBB,
-                                        MachineBasicBlock *FBB,
-                                        ArrayRef<MachineOperand> Cond,
-                                        const DebugLoc &DL,
-                                        int *BytesAdded) const {
+unsigned AArch64InstrInfo::insertBranch(
+    MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+    ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
   // Shouldn't be a fall through.
   assert(TBB && "insertBranch must not be told to insert a fallthrough");
 
@@ -485,10 +482,11 @@ static unsigned canFoldIntoCSel(const Ma
   return Opc;
 }
 
-bool AArch64InstrInfo::canInsertSelect(
-    const MachineBasicBlock &MBB, ArrayRef<MachineOperand> Cond,
-    unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles,
-    int &FalseCycles) const {
+bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
+                                       ArrayRef<MachineOperand> Cond,
+                                       unsigned TrueReg, unsigned FalseReg,
+                                       int &CondCycles, int &TrueCycles,
+                                       int &FalseCycles) const {
   // Check register classes.
   const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
   const TargetRegisterClass *RC =
@@ -656,8 +654,10 @@ void AArch64InstrInfo::insertSelect(Mach
   MRI.constrainRegClass(FalseReg, RC);
 
   // Insert the csel.
-  BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm(
-      CC);
+  BuildMI(MBB, I, DL, get(Opc), DstReg)
+      .addReg(TrueReg)
+      .addReg(FalseReg)
+      .addImm(CC);
 }
 
 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an  ORRxx.
@@ -1078,11 +1078,7 @@ static unsigned convertToNonFlagSettingO
   }
 }
 
-enum AccessKind {
-  AK_Write = 0x01,
-  AK_Read  = 0x10,
-  AK_All   = 0x11
-};
+enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
 
 /// True when condition flags are accessed (either by writing or reading)
 /// on the instruction trace starting at From and ending at To.
@@ -1111,21 +1107,24 @@ static bool areCFlagsAccessedBetweenInst
   for (--To; To != From; --To) {
     const MachineInstr &Instr = *To;
 
-    if ( ((AccessToCheck & AK_Write) && Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
-         ((AccessToCheck & AK_Read)  && Instr.readsRegister(AArch64::NZCV, TRI)))
+    if (((AccessToCheck & AK_Write) &&
+         Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
+        ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
       return true;
   }
   return false;
 }
 
 /// Try to optimize a compare instruction. A compare instruction is an
-/// instruction which produces AArch64::NZCV. It can be truly compare instruction
+/// instruction which produces AArch64::NZCV. It can be truly compare
+/// instruction
 /// when there are no uses of its destination register.
 ///
 /// The following steps are tried in order:
 /// 1. Convert CmpInstr into an unconditional version.
 /// 2. Remove CmpInstr if above there is an instruction producing a needed
-///    condition code or an instruction which can be converted into such an instruction.
+///    condition code or an instruction which can be converted into such an
+///    instruction.
 ///    Only comparison with zero is supported.
 bool AArch64InstrInfo::optimizeCompareInstr(
     MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
@@ -1187,20 +1186,34 @@ static unsigned sForm(MachineInstr &Inst
   case AArch64::SUBSXri:
     return Instr.getOpcode();
 
-  case AArch64::ADDWrr:    return AArch64::ADDSWrr;
-  case AArch64::ADDWri:    return AArch64::ADDSWri;
-  case AArch64::ADDXrr:    return AArch64::ADDSXrr;
-  case AArch64::ADDXri:    return AArch64::ADDSXri;
-  case AArch64::ADCWr:     return AArch64::ADCSWr;
-  case AArch64::ADCXr:     return AArch64::ADCSXr;
-  case AArch64::SUBWrr:    return AArch64::SUBSWrr;
-  case AArch64::SUBWri:    return AArch64::SUBSWri;
-  case AArch64::SUBXrr:    return AArch64::SUBSXrr;
-  case AArch64::SUBXri:    return AArch64::SUBSXri;
-  case AArch64::SBCWr:     return AArch64::SBCSWr;
-  case AArch64::SBCXr:     return AArch64::SBCSXr;
-  case AArch64::ANDWri:    return AArch64::ANDSWri;
-  case AArch64::ANDXri:    return AArch64::ANDSXri;
+  case AArch64::ADDWrr:
+    return AArch64::ADDSWrr;
+  case AArch64::ADDWri:
+    return AArch64::ADDSWri;
+  case AArch64::ADDXrr:
+    return AArch64::ADDSXrr;
+  case AArch64::ADDXri:
+    return AArch64::ADDSXri;
+  case AArch64::ADCWr:
+    return AArch64::ADCSWr;
+  case AArch64::ADCXr:
+    return AArch64::ADCSXr;
+  case AArch64::SUBWrr:
+    return AArch64::SUBSWrr;
+  case AArch64::SUBWri:
+    return AArch64::SUBSWri;
+  case AArch64::SUBXrr:
+    return AArch64::SUBSXrr;
+  case AArch64::SUBXri:
+    return AArch64::SUBSXri;
+  case AArch64::SBCWr:
+    return AArch64::SBCSWr;
+  case AArch64::SBCXr:
+    return AArch64::SBCSXr;
+  case AArch64::ANDWri:
+    return AArch64::ANDSWri;
+  case AArch64::ANDXri:
+    return AArch64::ANDSXri;
   }
 }
 
@@ -1222,7 +1235,7 @@ struct UsedNZCV {
 
   UsedNZCV() = default;
 
-  UsedNZCV& operator |=(const UsedNZCV& UsedFlags) {
+  UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
     this->N |= UsedFlags.N;
     this->Z |= UsedFlags.Z;
     this->C |= UsedFlags.C;
@@ -1238,29 +1251,29 @@ struct UsedNZCV {
 /// codes or we don't optimize CmpInstr in the presence of such instructions.
 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
   switch (Instr.getOpcode()) {
-    default:
-      return AArch64CC::Invalid;
+  default:
+    return AArch64CC::Invalid;
 
-    case AArch64::Bcc: {
-      int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
-      assert(Idx >= 2);
-      return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
-    }
-
-    case AArch64::CSINVWr:
-    case AArch64::CSINVXr:
-    case AArch64::CSINCWr:
-    case AArch64::CSINCXr:
-    case AArch64::CSELWr:
-    case AArch64::CSELXr:
-    case AArch64::CSNEGWr:
-    case AArch64::CSNEGXr:
-    case AArch64::FCSELSrrr:
-    case AArch64::FCSELDrrr: {
-      int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
-      assert(Idx >= 1);
-      return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
-    }
+  case AArch64::Bcc: {
+    int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
+    assert(Idx >= 2);
+    return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
+  }
+
+  case AArch64::CSINVWr:
+  case AArch64::CSINVXr:
+  case AArch64::CSINCWr:
+  case AArch64::CSINCXr:
+  case AArch64::CSELWr:
+  case AArch64::CSELXr:
+  case AArch64::CSNEGWr:
+  case AArch64::CSNEGXr:
+  case AArch64::FCSELSrrr:
+  case AArch64::FCSELDrrr: {
+    int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
+    assert(Idx >= 1);
+    return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
+  }
   }
 }
 
@@ -1268,42 +1281,42 @@ static UsedNZCV getUsedNZCV(AArch64CC::C
   assert(CC != AArch64CC::Invalid);
   UsedNZCV UsedFlags;
   switch (CC) {
-    default:
-      break;
+  default:
+    break;
 
-    case AArch64CC::EQ: // Z set
-    case AArch64CC::NE: // Z clear
-      UsedFlags.Z = true;
-      break;
+  case AArch64CC::EQ: // Z set
+  case AArch64CC::NE: // Z clear
+    UsedFlags.Z = true;
+    break;
 
-    case AArch64CC::HI: // Z clear and C set
-    case AArch64CC::LS: // Z set   or  C clear
-      UsedFlags.Z = true;
-      LLVM_FALLTHROUGH;
-    case AArch64CC::HS: // C set
-    case AArch64CC::LO: // C clear
-      UsedFlags.C = true;
-      break;
+  case AArch64CC::HI: // Z clear and C set
+  case AArch64CC::LS: // Z set   or  C clear
+    UsedFlags.Z = true;
+    LLVM_FALLTHROUGH;
+  case AArch64CC::HS: // C set
+  case AArch64CC::LO: // C clear
+    UsedFlags.C = true;
+    break;
 
-    case AArch64CC::MI: // N set
-    case AArch64CC::PL: // N clear
-      UsedFlags.N = true;
-      break;
+  case AArch64CC::MI: // N set
+  case AArch64CC::PL: // N clear
+    UsedFlags.N = true;
+    break;
 
-    case AArch64CC::VS: // V set
-    case AArch64CC::VC: // V clear
-      UsedFlags.V = true;
-      break;
+  case AArch64CC::VS: // V set
+  case AArch64CC::VC: // V clear
+    UsedFlags.V = true;
+    break;
 
-    case AArch64CC::GT: // Z clear, N and V the same
-    case AArch64CC::LE: // Z set,   N and V differ
-      UsedFlags.Z = true;
-      LLVM_FALLTHROUGH;
-    case AArch64CC::GE: // N and V the same
-    case AArch64CC::LT: // N and V differ 
-      UsedFlags.N = true;
-      UsedFlags.V = true;
-      break;
+  case AArch64CC::GT: // Z clear, N and V the same
+  case AArch64CC::LE: // Z set,   N and V differ
+    UsedFlags.Z = true;
+    LLVM_FALLTHROUGH;
+  case AArch64CC::GE: // N and V the same
+  case AArch64CC::LT: // N and V differ
+    UsedFlags.N = true;
+    UsedFlags.V = true;
+    break;
   }
   return UsedFlags;
 }
@@ -1328,7 +1341,7 @@ static bool isSUBSRegImm(unsigned Opcode
 ///        nor uses of flags between MI and CmpInstr.
 /// - and  C/V flags are not used after CmpInstr
 static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
-    const TargetRegisterInfo *TRI) {
+                                       const TargetRegisterInfo *TRI) {
   assert(MI);
   assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
   assert(CmpInstr);
@@ -1350,7 +1363,8 @@ static bool canInstrSubstituteCmpInstr(M
     return false;
 
   UsedNZCV NZCVUsedAfterCmp;
-  for (auto I = std::next(CmpInstr->getIterator()), E = CmpInstr->getParent()->instr_end();
+  for (auto I = std::next(CmpInstr->getIterator()),
+            E = CmpInstr->getParent()->instr_end();
        I != E; ++I) {
     const MachineInstr &Instr = *I;
     if (Instr.readsRegister(AArch64::NZCV, TRI)) {
@@ -1363,7 +1377,7 @@ static bool canInstrSubstituteCmpInstr(M
     if (Instr.modifiesRegister(AArch64::NZCV, TRI))
       break;
   }
-  
+
   return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
 }
 
@@ -1421,16 +1435,20 @@ bool AArch64InstrInfo::expandPostRAPseud
         .addMemOperand(*MI.memoperands_begin());
   } else if (TM.getCodeModel() == CodeModel::Large) {
     BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
-        .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
+        .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
+        .addImm(0);
     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
         .addReg(Reg, RegState::Kill)
-        .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16);
+        .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
+        .addImm(16);
     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
         .addReg(Reg, RegState::Kill)
-        .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32);
+        .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
+        .addImm(32);
     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
         .addReg(Reg, RegState::Kill)
-        .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48);
+        .addGlobalAddress(GV, 0, AArch64II::MO_G3)
+        .addImm(48);
     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
         .addReg(Reg, RegState::Kill)
         .addImm(0)
@@ -1812,7 +1830,7 @@ bool AArch64InstrInfo::getMemOpBaseRegIm
   } else
     return false;
 
-  // Get the scaling factor for the instruction and set the width for the 
+  // Get the scaling factor for the instruction and set the width for the
   // instruction.
   unsigned Scale = 0;
   int64_t Dummy1, Dummy2;
@@ -1835,10 +1853,10 @@ bool AArch64InstrInfo::getMemOpBaseRegIm
   return true;
 }
 
-MachineOperand&
+MachineOperand &
 AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
-  MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands()-1);
+  MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
   assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
   return OfsOp;
 }
@@ -1847,7 +1865,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsi
                                     unsigned &Width, int64_t &MinOffset,
                                     int64_t &MaxOffset) const {
   switch (Opcode) {
-  // Not a memory operation or something we want to handle.  
+  // Not a memory operation or something we want to handle.
   default:
     Scale = Width = 0;
     MinOffset = MaxOffset = 0;
@@ -2102,12 +2120,13 @@ static bool forwardCopyWillClobberTuple(
   return ((DestReg - SrcReg) & 0x1f) < NumRegs;
 }
 
-void AArch64InstrInfo::copyPhysRegTuple(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
-    unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
-    ArrayRef<unsigned> Indices) const {
-  assert(Subtarget.hasNEON() &&
-         "Unexpected register copy without NEON");
+void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator I,
+                                        const DebugLoc &DL, unsigned DestReg,
+                                        unsigned SrcReg, bool KillSrc,
+                                        unsigned Opcode,
+                                        ArrayRef<unsigned> Indices) const {
+  assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
   const TargetRegisterInfo *TRI = &getRegisterInfo();
   uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
   uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
@@ -2160,8 +2179,9 @@ void AArch64InstrInfo::copyPhysReg(Machi
             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
       }
     } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
-      BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm(
-          AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
+      BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
+          .addImm(0)
+          .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
     } else {
       if (Subtarget.hasZeroCycleRegMove()) {
         // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
@@ -2196,8 +2216,9 @@ void AArch64InstrInfo::copyPhysReg(Machi
           .addImm(0)
           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
     } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
-      BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm(
-          AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
+      BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
+          .addImm(0)
+          .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
     } else {
       // Otherwise, expand to ORR XZR.
       BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
@@ -2210,8 +2231,8 @@ void AArch64InstrInfo::copyPhysReg(Machi
   // Copy a DDDD register quad by copying the individual sub-registers.
   if (AArch64::DDDDRegClass.contains(DestReg) &&
       AArch64::DDDDRegClass.contains(SrcReg)) {
-    static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
-                                        AArch64::dsub2, AArch64::dsub3 };
+    static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
+                                       AArch64::dsub2, AArch64::dsub3};
     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
                      Indices);
     return;
@@ -2220,8 +2241,8 @@ void AArch64InstrInfo::copyPhysReg(Machi
   // Copy a DDD register triple by copying the individual sub-registers.
   if (AArch64::DDDRegClass.contains(DestReg) &&
       AArch64::DDDRegClass.contains(SrcReg)) {
-    static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
-                                        AArch64::dsub2 };
+    static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
+                                       AArch64::dsub2};
     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
                      Indices);
     return;
@@ -2230,7 +2251,7 @@ void AArch64InstrInfo::copyPhysReg(Machi
   // Copy a DD register pair by copying the individual sub-registers.
   if (AArch64::DDRegClass.contains(DestReg) &&
       AArch64::DDRegClass.contains(SrcReg)) {
-    static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 };
+    static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
                      Indices);
     return;
@@ -2239,8 +2260,8 @@ void AArch64InstrInfo::copyPhysReg(Machi
   // Copy a QQQQ register quad by copying the individual sub-registers.
   if (AArch64::QQQQRegClass.contains(DestReg) &&
       AArch64::QQQQRegClass.contains(SrcReg)) {
-    static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
-                                        AArch64::qsub2, AArch64::qsub3 };
+    static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
+                                       AArch64::qsub2, AArch64::qsub3};
     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
                      Indices);
     return;
@@ -2249,8 +2270,8 @@ void AArch64InstrInfo::copyPhysReg(Machi
   // Copy a QQQ register triple by copying the individual sub-registers.
   if (AArch64::QQQRegClass.contains(DestReg) &&
       AArch64::QQQRegClass.contains(SrcReg)) {
-    static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
-                                        AArch64::qsub2 };
+    static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
+                                       AArch64::qsub2};
     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
                      Indices);
     return;
@@ -2259,7 +2280,7 @@ void AArch64InstrInfo::copyPhysReg(Machi
   // Copy a QQ register pair by copying the individual sub-registers.
   if (AArch64::QQRegClass.contains(DestReg) &&
       AArch64::QQRegClass.contains(SrcReg)) {
-    static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 };
+    static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
                      Indices);
     return;
@@ -2267,28 +2288,28 @@ void AArch64InstrInfo::copyPhysReg(Machi
 
   if (AArch64::FPR128RegClass.contains(DestReg) &&
       AArch64::FPR128RegClass.contains(SrcReg)) {
-    if(Subtarget.hasNEON()) {
+    if (Subtarget.hasNEON()) {
       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
           .addReg(SrcReg)
           .addReg(SrcReg, getKillRegState(KillSrc));
     } else {
       BuildMI(MBB, I, DL, get(AArch64::STRQpre))
-        .addReg(AArch64::SP, RegState::Define)
-        .addReg(SrcReg, getKillRegState(KillSrc))
-        .addReg(AArch64::SP)
-        .addImm(-16);
+          .addReg(AArch64::SP, RegState::Define)
+          .addReg(SrcReg, getKillRegState(KillSrc))
+          .addReg(AArch64::SP)
+          .addImm(-16);
       BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
-        .addReg(AArch64::SP, RegState::Define)
-        .addReg(DestReg, RegState::Define)
-        .addReg(AArch64::SP)
-        .addImm(16);
+          .addReg(AArch64::SP, RegState::Define)
+          .addReg(DestReg, RegState::Define)
+          .addReg(AArch64::SP)
+          .addImm(16);
     }
     return;
   }
 
   if (AArch64::FPR64RegClass.contains(DestReg) &&
       AArch64::FPR64RegClass.contains(SrcReg)) {
-    if(Subtarget.hasNEON()) {
+    if (Subtarget.hasNEON()) {
       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
                                        &AArch64::FPR128RegClass);
       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
@@ -2305,7 +2326,7 @@ void AArch64InstrInfo::copyPhysReg(Machi
 
   if (AArch64::FPR32RegClass.contains(DestReg) &&
       AArch64::FPR32RegClass.contains(SrcReg)) {
-    if(Subtarget.hasNEON()) {
+    if (Subtarget.hasNEON()) {
       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
                                        &AArch64::FPR128RegClass);
       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
@@ -2322,7 +2343,7 @@ void AArch64InstrInfo::copyPhysReg(Machi
 
   if (AArch64::FPR16RegClass.contains(DestReg) &&
       AArch64::FPR16RegClass.contains(SrcReg)) {
-    if(Subtarget.hasNEON()) {
+    if (Subtarget.hasNEON()) {
       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
                                        &AArch64::FPR128RegClass);
       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
@@ -2343,7 +2364,7 @@ void AArch64InstrInfo::copyPhysReg(Machi
 
   if (AArch64::FPR8RegClass.contains(DestReg) &&
       AArch64::FPR8RegClass.contains(SrcReg)) {
-    if(Subtarget.hasNEON()) {
+    if (Subtarget.hasNEON()) {
       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
                                        &AArch64::FPR128RegClass);
       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
@@ -2392,17 +2413,17 @@ void AArch64InstrInfo::copyPhysReg(Machi
   if (DestReg == AArch64::NZCV) {
     assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
     BuildMI(MBB, I, DL, get(AArch64::MSR))
-      .addImm(AArch64SysReg::NZCV)
-      .addReg(SrcReg, getKillRegState(KillSrc))
-      .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
+        .addImm(AArch64SysReg::NZCV)
+        .addReg(SrcReg, getKillRegState(KillSrc))
+        .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
     return;
   }
 
   if (SrcReg == AArch64::NZCV) {
     assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
     BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
-      .addImm(AArch64SysReg::NZCV)
-      .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
+        .addImm(AArch64SysReg::NZCV)
+        .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
     return;
   }
 
@@ -2458,45 +2479,39 @@ void AArch64InstrInfo::storeRegToStackSl
     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
       Opc = AArch64::STRQui;
     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
-      assert(Subtarget.hasNEON() &&
-             "Unexpected register store without NEON");
+      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
       Opc = AArch64::ST1Twov1d;
       Offset = false;
     }
     break;
   case 24:
     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
-      assert(Subtarget.hasNEON() &&
-             "Unexpected register store without NEON");
+      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
       Opc = AArch64::ST1Threev1d;
       Offset = false;
     }
     break;
   case 32:
     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
-      assert(Subtarget.hasNEON() &&
-             "Unexpected register store without NEON");
+      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
       Opc = AArch64::ST1Fourv1d;
       Offset = false;
     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
-      assert(Subtarget.hasNEON() &&
-             "Unexpected register store without NEON");
+      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
       Opc = AArch64::ST1Twov2d;
       Offset = false;
     }
     break;
   case 48:
     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
-      assert(Subtarget.hasNEON() &&
-             "Unexpected register store without NEON");
+      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
       Opc = AArch64::ST1Threev2d;
       Offset = false;
     }
     break;
   case 64:
     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
-      assert(Subtarget.hasNEON() &&
-             "Unexpected register store without NEON");
+      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
       Opc = AArch64::ST1Fourv2d;
       Offset = false;
     }
@@ -2505,8 +2520,8 @@ void AArch64InstrInfo::storeRegToStackSl
   assert(Opc && "Unknown register class");
 
   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
-                                      .addReg(SrcReg, getKillRegState(isKill))
-                                      .addFrameIndex(FI);
+                                     .addReg(SrcReg, getKillRegState(isKill))
+                                     .addFrameIndex(FI);
 
   if (Offset)
     MI.addImm(0);
@@ -2562,45 +2577,39 @@ void AArch64InstrInfo::loadRegFromStackS
     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
       Opc = AArch64::LDRQui;
     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
-      assert(Subtarget.hasNEON() &&
-             "Unexpected register load without NEON");
+      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
       Opc = AArch64::LD1Twov1d;
       Offset = false;
     }
     break;
   case 24:
     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
-      assert(Subtarget.hasNEON() &&
-             "Unexpected register load without NEON");
+      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
       Opc = AArch64::LD1Threev1d;
       Offset = false;
     }
     break;
   case 32:
     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
-      assert(Subtarget.hasNEON() &&
-             "Unexpected register load without NEON");
+      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
       Opc = AArch64::LD1Fourv1d;
       Offset = false;
     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
-      assert(Subtarget.hasNEON() &&
-             "Unexpected register load without NEON");
+      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
       Opc = AArch64::LD1Twov2d;
       Offset = false;
     }
     break;
   case 48:
     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
-      assert(Subtarget.hasNEON() &&
-             "Unexpected register load without NEON");
+      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
       Opc = AArch64::LD1Threev2d;
       Offset = false;
     }
     break;
   case 64:
     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
-      assert(Subtarget.hasNEON() &&
-             "Unexpected register load without NEON");
+      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
       Opc = AArch64::LD1Fourv2d;
       Offset = false;
     }
@@ -2609,8 +2618,8 @@ void AArch64InstrInfo::loadRegFromStackS
   assert(Opc && "Unknown register class");
 
   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
-                                      .addReg(DestReg, getDefRegState(true))
-                                      .addFrameIndex(FI);
+                                     .addReg(DestReg, getDefRegState(true))
+                                     .addFrameIndex(FI);
   if (Offset)
     MI.addImm(0);
   MI.addMemOperand(MMO);
@@ -2755,7 +2764,7 @@ MachineInstr *AArch64InstrInfo::foldMemo
 
     if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
       assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
-             TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
+                 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
              "Mismatched register size in non subreg COPY");
       if (IsSpill)
         storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
@@ -3138,10 +3147,7 @@ void AArch64InstrInfo::getNoop(MCInst &N
 }
 
 // AArch64 supports MachineCombiner.
-bool AArch64InstrInfo::useMachineCombiner() const {
-
-  return true;
-}
+bool AArch64InstrInfo::useMachineCombiner() const { return true; }
 
 // True when Opc sets flag
 static bool isCombineInstrSettingFlag(unsigned Opc) {
@@ -3275,7 +3281,8 @@ static bool canCombineWithFMUL(MachineBa
 //       1. Other data types (integer, vectors)
 //       2. Other math / logic operations (xor, or)
 //       3. Other forms of the same operation (intrinsics and other variants)
-bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
+bool AArch64InstrInfo::isAssociativeAndCommutative(
+    const MachineInstr &Inst) const {
   switch (Inst.getOpcode()) {
   case AArch64::FADDDrr:
   case AArch64::FADDSrr:
@@ -3595,8 +3602,8 @@ static bool getFMAPatterns(MachineInstr
 /// Return true when a code sequence can improve throughput. It
 /// should be called only for instructions in loops.
 /// \param Pattern - combiner pattern
-bool
-AArch64InstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const {
+bool AArch64InstrInfo::isThroughputPattern(
+    MachineCombinerPattern Pattern) const {
   switch (Pattern) {
   default:
     break;
@@ -3747,8 +3754,8 @@ genFusedMultiply(MachineFunction &MF, Ma
 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
                               const TargetInstrInfo *TII, MachineInstr &Root,
                               SmallVectorImpl<MachineInstr *> &InsInstrs,
-                              unsigned IdxMulOpd, unsigned MaddOpc,
-                              unsigned VR, const TargetRegisterClass *RC) {
+                              unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
+                              const TargetRegisterClass *RC) {
   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
 
   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
@@ -3767,11 +3774,11 @@ static MachineInstr *genMaddR(MachineFun
   if (TargetRegisterInfo::isVirtualRegister(VR))
     MRI.constrainRegClass(VR, RC);
 
-  MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
-                                    ResultReg)
-                                .addReg(SrcReg0, getKillRegState(Src0IsKill))
-                                .addReg(SrcReg1, getKillRegState(Src1IsKill))
-                                .addReg(VR);
+  MachineInstrBuilder MIB =
+      BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
+          .addReg(SrcReg0, getKillRegState(Src0IsKill))
+          .addReg(SrcReg1, getKillRegState(Src1IsKill))
+          .addReg(VR);
   // Insert the MADD
   InsInstrs.push_back(MIB);
   return MUL;
@@ -4401,12 +4408,9 @@ AArch64InstrInfo::getSerializableDirectM
   using namespace AArch64II;
 
   static const std::pair<unsigned, const char *> TargetFlags[] = {
-      {MO_PAGE, "aarch64-page"},
-      {MO_PAGEOFF, "aarch64-pageoff"},
-      {MO_G3, "aarch64-g3"},
-      {MO_G2, "aarch64-g2"},
-      {MO_G1, "aarch64-g1"},
-      {MO_G0, "aarch64-g0"},
+      {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
+      {MO_G3, "aarch64-g3"},     {MO_G2, "aarch64-g2"},
+      {MO_G1, "aarch64-g1"},     {MO_G0, "aarch64-g0"},
       {MO_HI12, "aarch64-hi12"}};
   return makeArrayRef(TargetFlags);
 }
@@ -4416,9 +4420,7 @@ AArch64InstrInfo::getSerializableBitmask
   using namespace AArch64II;
 
   static const std::pair<unsigned, const char *> TargetFlags[] = {
-      {MO_GOT, "aarch64-got"},
-      {MO_NC, "aarch64-nc"},
-      {MO_TLS, "aarch64-tls"}};
+      {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, {MO_TLS, "aarch64-tls"}};
   return makeArrayRef(TargetFlags);
 }
 
@@ -4430,26 +4432,27 @@ AArch64InstrInfo::getSerializableMachine
   return makeArrayRef(TargetFlags);
 }
 
-unsigned AArch64InstrInfo::getOutliningBenefit(size_t SequenceSize,
-                                               size_t Occurrences,
-                                               bool CanBeTailCall) const {
-  unsigned NotOutlinedSize = SequenceSize * Occurrences;
-  unsigned OutlinedSize;
-
-  // Is this candidate something we can outline as a tail call?
-  if (CanBeTailCall) {
-    // If yes, then we just outline the sequence and replace each of its
-    // occurrences with a branch instruction.
-    OutlinedSize = SequenceSize + Occurrences;
-  } else {
-    // If no, then we outline the sequence (SequenceSize), add a return (+1),
-    // and replace each occurrence with a save/restore to LR and a call
-    // (3 * Occurrences)
-    OutlinedSize = (SequenceSize + 1) + (3 * Occurrences);
-  }
+size_t AArch64InstrInfo::getOutliningCallOverhead(
+    MachineBasicBlock::iterator &StartIt,
+    MachineBasicBlock::iterator &EndIt) const {
+  // Is this a tail-call?
+  if (EndIt->isTerminator())
+    return 1; // Yes, so we don't need to save/restore LR.
+
+  // No, so save + restore LR.
+  return 3;
+}
+
+size_t AArch64InstrInfo::getOutliningFrameOverhead(
+    MachineBasicBlock::iterator &StartIt,
+    MachineBasicBlock::iterator &EndIt) const {
+
+  // Is this a tail-call?
+  if (EndIt->isTerminator())
+    return 0; // Yes, so we already have a return.
 
-  // Return the number of instructions saved by outlining this sequence.
-  return NotOutlinedSize > OutlinedSize ? NotOutlinedSize - OutlinedSize : 0;
+  // No, so we have to add a return to the end.
+  return 1;
 }
 
 bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const {
@@ -4475,7 +4478,7 @@ AArch64InstrInfo::getOutliningType(Machi
 
     // Is this the end of a function?
     if (MI.getParent()->succ_empty())
-        return MachineOutlinerInstrType::Legal;
+      return MachineOutlinerInstrType::Legal;
 
     // It's not, so don't outline it.
     return MachineOutlinerInstrType::Illegal;
@@ -4494,7 +4497,7 @@ AArch64InstrInfo::getOutliningType(Machi
   // Don't outline anything that uses the link register.
   if (MI.modifiesRegister(AArch64::LR, &RI) ||
       MI.readsRegister(AArch64::LR, &RI))
-      return MachineOutlinerInstrType::Illegal;
+    return MachineOutlinerInstrType::Illegal;
 
   // Does this use the stack?
   if (MI.modifiesRegister(AArch64::SP, &RI) ||
@@ -4502,13 +4505,13 @@ AArch64InstrInfo::getOutliningType(Machi
 
     // Is it a memory operation?
     if (MI.mayLoadOrStore()) {
-      unsigned Base; // Filled with the base regiser of MI.
+      unsigned Base;  // Filled with the base regiser of MI.
       int64_t Offset; // Filled with the offset of MI.
       unsigned DummyWidth;
 
       // Does it allow us to offset the base register and is the base SP?
       if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
-                                      Base != AArch64::SP)
+          Base != AArch64::SP)
         return MachineOutlinerInstrType::Illegal;
 
       // Find the minimum/maximum offset for this instruction and check if
@@ -4522,7 +4525,7 @@ AArch64InstrInfo::getOutliningType(Machi
       // This is tricky to test with IR tests, but when the outliner is moved
       // to a MIR test, it really ought to be checked.
       if (Offset + 16 < MinOffset || Offset + 16 > MaxOffset)
-	return MachineOutlinerInstrType::Illegal;
+        return MachineOutlinerInstrType::Illegal;
 
       // It's in range, so we can outline it.
       return MachineOutlinerInstrType::Legal;
@@ -4558,7 +4561,7 @@ void AArch64InstrInfo::fixupPostOutline(
     // We've pushed the return address to the stack, so add 16 to the offset.
     // This is safe, since we already checked if it would overflow when we
     // checked if this instruction was legal to outline.
-    int64_t NewImm = (Offset + 16)/Scale;
+    int64_t NewImm = (Offset + 16) / Scale;
     StackOffsetOperand.setImm(NewImm);
   }
 }
@@ -4624,4 +4627,3 @@ MachineBasicBlock::iterator AArch64Instr
 
   return It;
 }
-

Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h?rev=309356&r1=309355&r2=309356&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h Thu Jul 27 20:21:58 2017
@@ -299,8 +299,10 @@ public:
   getSerializableMachineMemOperandTargetFlags() const override;
 
   bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override;
-  unsigned getOutliningBenefit(size_t SequenceSize, size_t Occurrences,
-                               bool CanBeTailCall) const override;
+  size_t getOutliningCallOverhead(MachineBasicBlock::iterator &StartIt,
+  MachineBasicBlock::iterator &EndIt) const override;
+  size_t getOutliningFrameOverhead(MachineBasicBlock::iterator &StartIt,
+  MachineBasicBlock::iterator &EndIt) const override;
   AArch64GenInstrInfo::MachineOutlinerInstrType
   getOutliningType(MachineInstr &MI) const override;
   void insertOutlinerEpilogue(MachineBasicBlock &MBB,

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=309356&r1=309355&r2=309356&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Thu Jul 27 20:21:58 2017
@@ -10537,25 +10537,22 @@ char LDTLSCleanup::ID = 0;
 FunctionPass*
 llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
 
-unsigned X86InstrInfo::getOutliningBenefit(size_t SequenceSize,
-                                           size_t Occurrences,
-                                           bool CanBeTailCall) const {
-  unsigned NotOutlinedSize = SequenceSize * Occurrences;
-  unsigned OutlinedSize;
+size_t X86InstrInfo::getOutliningCallOverhead(
+MachineBasicBlock::iterator &StartIt,
+MachineBasicBlock::iterator &EndIt) const {
+  // We just have to emit a call, so return 1.
+  return 1;
+}
 
-  // Is it a tail call?
-  if (CanBeTailCall) {
-    // If yes, we don't have to include a return instruction-- it's already in
-    // our sequence. So we have one occurrence of the sequence + #Occurrences
-    // calls.
-    OutlinedSize = SequenceSize + Occurrences;
-  } else {
-    // If not, add one for the return instruction.
-    OutlinedSize = (SequenceSize + 1) + Occurrences;
-  }
+size_t X86InstrInfo::getOutliningFrameOverhead(
+MachineBasicBlock::iterator &StartIt,
+MachineBasicBlock::iterator &EndIt) const {
+  // Is this a tail-call?
+  if (EndIt->isTerminator())
+    return 0; // Yes, so we already have a return.
 
-  // Return the number of instructions saved by outlining this sequence.
-  return NotOutlinedSize > OutlinedSize ? NotOutlinedSize - OutlinedSize : 0;
+  // No, so we have to add a return to the end.
+  return 1;
 }
 
 bool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const {

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.h?rev=309356&r1=309355&r2=309356&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.h Thu Jul 27 20:21:58 2017
@@ -566,9 +566,11 @@ public:
   ArrayRef<std::pair<unsigned, const char *>>
   getSerializableDirectMachineOperandTargetFlags() const override;
 
-  unsigned getOutliningBenefit(size_t SequenceSize,
-                               size_t Occurrences,
-                               bool CanBeTailCall) const override;
+  size_t getOutliningCallOverhead(MachineBasicBlock::iterator &StartIt,
+  MachineBasicBlock::iterator &EndIt) const override;
+  
+  size_t getOutliningFrameOverhead(MachineBasicBlock::iterator &StartIt,
+  MachineBasicBlock::iterator &EndIt) const override;
 
   bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override;
 




More information about the llvm-commits mailing list