[llvm] r270559 - Rework/enhance stack coloring data flow analysis.
    Than McIntosh via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Tue May 24 06:23:45 PDT 2016
    
    
  
Author: thanm
Date: Tue May 24 08:23:44 2016
New Revision: 270559
URL: http://llvm.org/viewvc/llvm-project?rev=270559&view=rev
Log:
Rework/enhance stack coloring data flow analysis.
Replace bidirectional flow analysis to compute liveness with forward
analysis pass. Treat lifetimes as starting when there is a first
reference to the stack slot, as opposed to starting at the point of the
lifetime.start intrinsic, so as to increase the number of stack
variables we can overlap.
Reviewers: gbiv, qcolumbet, wmi
Differential Revision: http://reviews.llvm.org/D18827
Bug: 25776
Modified:
    llvm/trunk/lib/CodeGen/StackColoring.cpp
    llvm/trunk/test/CodeGen/X86/StackColoring.ll
    llvm/trunk/test/CodeGen/X86/misched-aa-colored.ll
Modified: llvm/trunk/lib/CodeGen/StackColoring.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/StackColoring.cpp?rev=270559&r1=270558&r2=270559&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/StackColoring.cpp (original)
+++ llvm/trunk/lib/CodeGen/StackColoring.cpp Tue May 24 08:23:44 2016
@@ -64,18 +64,180 @@ DisableColoring("no-stack-coloring",
 /// The user may write code that uses allocas outside of the declared lifetime
 /// zone. This can happen when the user returns a reference to a local
 /// data-structure. We can detect these cases and decide not to optimize the
-/// code. If this flag is enabled, we try to save the user.
+/// code. If this flag is enabled, we try to save the user. This option
+/// is treated as overriding LifetimeStartOnFirstUse below.
 static cl::opt<bool>
 ProtectFromEscapedAllocas("protect-from-escaped-allocas",
                           cl::init(false), cl::Hidden,
                           cl::desc("Do not optimize lifetime zones that "
                                    "are broken"));
 
+/// Enable enhanced dataflow scheme for lifetime analysis (treat first
+/// use of stack slot as start of slot lifetime, as opposed to looking
+/// for LIFETIME_START marker). See "Implementation notes" below for
+/// more info.
+static cl::opt<bool>
+LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use",
+        cl::init(true), cl::Hidden,
+        cl::desc("Treat stack lifetimes as starting on first use, not on START marker."));
+
+
 STATISTIC(NumMarkerSeen,  "Number of lifetime markers found.");
 STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
 STATISTIC(StackSlotMerged, "Number of stack slot merged.");
 STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
 
+//
+// Implementation Notes:
+// ---------------------
+//
+// Consider the following motivating example:
+//
+//     int foo() {
+//       char b1[1024], b2[1024];
+//       if (...) {
+//         char b3[1024];
+//         <uses of b1, b3>;
+//         return x;
+//       } else {
+//         char b4[1024], b5[1024];
+//         <uses of b2, b4, b5>;
+//         return y;
+//       }
+//     }
+//
+// In the code above, "b3" and "b4" are declared in distinct lexical
+// scopes, meaning that it is easy to prove that they can share the
+// same stack slot. Variables "b1" and "b2" are declared in the same
+// scope, meaning that from a lexical point of view, their lifetimes
+// overlap. From a control flow pointer of view, however, the two
+// variables are accessed in disjoint regions of the CFG, thus it
+// should be possible for them to share the same stack slot. An ideal
+// stack allocation for the function above would look like:
+//
+//     slot 0: b1, b2
+//     slot 1: b3, b4
+//     slot 2: b5
+//
+// Achieving this allocation is tricky, however, due to the way
+// lifetime markers are inserted. Here is a simplified view of the
+// control flow graph for the code above:
+//
+//                +------  block 0 -------+
+//               0| LIFETIME_START b1, b2 |
+//               1| <test 'if' condition> |
+//                +-----------------------+
+//                   ./              \.
+//   +------  block 1 -------+   +------  block 2 -------+
+//  2| LIFETIME_START b3     |  5| LIFETIME_START b4, b5 |
+//  3| <uses of b1, b3>      |  6| <uses of b2, b4, b5>  |
+//  4| LIFETIME_END b3       |  7| LIFETIME_END b4, b5   |
+//   +-----------------------+   +-----------------------+
+//                   \.              /.
+//                +------  block 3 -------+
+//               8| <cleanupcode>         |
+//               9| LIFETIME_END b1, b2   |
+//              10| return                |
+//                +-----------------------+
+//
+// If we create live intervals for the variables above strictly based
+// on the lifetime markers, we'll get the set of intervals on the
+// left. If we ignore the lifetime start markers and instead treat a
+// variable's lifetime as beginning with the first reference to the
+// var, then we get the intervals on the right.
+//
+//            LIFETIME_START      First Use
+//     b1:    [0,9]               [3,4] [8,9]
+//     b2:    [0,9]               [6,9]
+//     b3:    [2,4]               [3,4]
+//     b4:    [5,7]               [6,7]
+//     b5:    [5,7]               [6,7]
+//
+// For the intervals on the left, the best we can do is overlap two
+// variables (b3 and b4, for example); this gives us a stack size of
+// 4*1024 bytes, not ideal. When treating first-use as the start of a
+// lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
+// byte stack (better).
+//
+// Relying entirely on first-use of stack slots is problematic,
+// however, due to the fact that optimizations can sometimes migrate
+// uses of a variable outside of its lifetime start/end region. Here
+// is an example:
+//
+//     int bar() {
+//       char b1[1024], b2[1024];
+//       if (...) {
+//         <uses of b2>
+//         return y;
+//       } else {
+//         <uses of b1>
+//         while (...) {
+//           char b3[1024];
+//           <uses of b3>
+//         }
+//       }
+//     }
+//
+// Before optimization, the control flow graph for the code above
+// might look like the following:
+//
+//                +------  block 0 -------+
+//               0| LIFETIME_START b1, b2 |
+//               1| <test 'if' condition> |
+//                +-----------------------+
+//                   ./              \.
+//   +------  block 1 -------+    +------- block 2 -------+
+//  2| <uses of b2>          |   3| <uses of b1>          |
+//   +-----------------------+    +-----------------------+
+//              |                            |
+//              |                 +------- block 3 -------+ <-\.
+//              |                4| <while condition>     |    |
+//              |                 +-----------------------+    |
+//              |               /          |                   |
+//              |              /  +------- block 4 -------+
+//              \             /  5| LIFETIME_START b3     |    |
+//               \           /   6| <uses of b3>          |    |
+//                \         /    7| LIFETIME_END b3       |    |
+//                 \        |    +------------------------+    |
+//                  \       |                 \                /
+//                +------  block 5 -----+      \---------------
+//               8| <cleanupcode>       |
+//               9| LIFETIME_END b1, b2 |
+//              10| return              |
+//                +---------------------+
+//
+// During optimization, however, it can happen that an instruction
+// computing an address in "b3" (for example, a loop-invariant GEP) is
+// hoisted up out of the loop from block 4 to block 2.  [Note that
+// this is not an actual load from the stack, only an instruction that
+// computes the address to be loaded]. If this happens, there is now a
+// path leading from the first use of b3 to the return instruction
+// that does not encounter the b3 LIFETIME_END, hence b3's lifetime is
+// now larger than if we were computing live intervals strictly based
+// on lifetime markers. In the example above, this lengthened lifetime
+// would mean that it would appear illegal to overlap b3 with b2.
+//
+// To deal with this such cases, the code in ::collectMarkers() below
+// tries to identify "degenerate" slots -- those slots where on a single
+// forward pass through the CFG we encounter a first reference to slot
+// K before we hit the slot K lifetime start marker. For such slots,
+// we fall back on using the lifetime start marker as the beginning of
+// the variable's lifetime.  NB: with this implementation, slots can
+// appear degenerate in cases where there is unstructured control flow:
+//
+//    if (q) goto mid;
+//    if (x > 9) {
+//         int b[100];
+//         memcpy(&b[0], ...);
+//    mid: b[k] = ...;
+//         abc(&b);
+//    }
+//
+// If in RPO ordering chosen to walk the CFG  we happen to visit the b[k]
+// before visiting the memcpy block (which will contain the lifetime start
+// for "b" then it will appear that 'b' has a degenerate lifetime.
+//
+
 //===----------------------------------------------------------------------===//
 //                           StackColoring Pass
 //===----------------------------------------------------------------------===//
@@ -123,6 +285,17 @@ class StackColoring : public MachineFunc
   /// once the coloring is done.
   SmallVector<MachineInstr*, 8> Markers;
 
+  /// Record the FI slots for which we have seen some sort of
+  /// lifetime marker (either start or end).
+  BitVector InterestingSlots;
+
+  /// Degenerate slots -- first use appears outside of start/end
+  /// lifetime markers.
+  BitVector DegenerateSlots;
+
+  /// Number of iterations taken during data flow analysis.
+  unsigned NumIterations;
+
 public:
   static char ID;
   StackColoring() : MachineFunctionPass(ID) {
@@ -153,6 +326,25 @@ private:
   /// in and out blocks.
   void calculateLocalLiveness();
 
+  /// Returns TRUE if we're using the first-use-begins-lifetime method for
+  /// this slot (if FALSE, then the start marker is treated as start of lifetime).
+  bool applyFirstUse(int Slot) {
+    if (!LifetimeStartOnFirstUse || ProtectFromEscapedAllocas)
+      return false;
+    if (DegenerateSlots.test(Slot))
+      return false;
+    return true;
+  }
+
+  /// Examines the specified instruction and returns TRUE if the instruction
+  /// represents the start or end of an interesting lifetime. The slot or slots
+  /// starting or ending are added to the vector "slots" and "isStart" is set
+  /// accordingly.
+  /// \returns True if inst contains a lifetime start or end
+  bool isLifetimeStartOrEnd(const MachineInstr &MI,
+                            SmallVector<int, 4> &slots,
+                            bool &isStart);
+
   /// Construct the LiveIntervals for the slots.
   void calculateLiveIntervals(unsigned NumSlots);
 
@@ -170,7 +362,10 @@ private:
 
   /// Map entries which point to other entries to their destination.
   ///   A->B->C becomes A->C.
-   void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
+  void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
+
+  /// Used in collectMarkers
+  typedef DenseMap<const MachineBasicBlock*, BitVector> BlockBitVecMap;
 };
 } // end anonymous namespace
 
@@ -228,9 +423,140 @@ LLVM_DUMP_METHOD void StackColoring::dum
 
 #endif // not NDEBUG
 
-unsigned StackColoring::collectMarkers(unsigned NumSlot) {
+static inline int getStartOrEndSlot(const MachineInstr &MI)
+{
+  assert((MI.getOpcode() == TargetOpcode::LIFETIME_START ||
+          MI.getOpcode() == TargetOpcode::LIFETIME_END) &&
+         "Expected LIFETIME_START or LIFETIME_END op");
+  const MachineOperand &MO = MI.getOperand(0);
+  int Slot = MO.getIndex();
+  if (Slot >= 0)
+    return Slot;
+  return -1;
+}
+
+//
+// At the moment the only way to end a variable lifetime is with
+// a VARIABLE_LIFETIME op (which can't contain a start). If things
+// change and the IR allows for a single inst that both begins
+// and ends lifetime(s), this interface will need to be reworked.
+//
+bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI,
+                                         SmallVector<int, 4> &slots,
+                                         bool &isStart)
+{
+  if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
+      MI.getOpcode() == TargetOpcode::LIFETIME_END) {
+    int Slot = getStartOrEndSlot(MI);
+    if (Slot < 0)
+      return false;
+    if (!InterestingSlots.test(Slot))
+      return false;
+    slots.push_back(Slot);
+    if (MI.getOpcode() == TargetOpcode::LIFETIME_END) {
+      isStart = false;
+      return true;
+    }
+    if (! applyFirstUse(Slot)) {
+      isStart = true;
+      return true;
+    }
+  } else if (LifetimeStartOnFirstUse && !ProtectFromEscapedAllocas) {
+    if (! MI.isDebugValue()) {
+      bool found = false;
+      for (const MachineOperand &MO : MI.operands()) {
+        if (!MO.isFI())
+          continue;
+        int Slot = MO.getIndex();
+        if (Slot<0)
+          continue;
+        if (InterestingSlots.test(Slot) && applyFirstUse(Slot)) {
+          slots.push_back(Slot);
+          found = true;
+        }
+      }
+      if (found) {
+        isStart = true;
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+unsigned StackColoring::collectMarkers(unsigned NumSlot)
+{
   unsigned MarkersFound = 0;
-  // Scan the function to find all lifetime markers.
+  BlockBitVecMap SeenStartMap;
+  InterestingSlots.clear();
+  InterestingSlots.resize(NumSlot);
+  DegenerateSlots.clear();
+  DegenerateSlots.resize(NumSlot);
+
+  // Step 1: collect markers and populate the "InterestingSlots"
+  // and "DegenerateSlots" sets.
+  for (MachineBasicBlock *MBB : depth_first(MF)) {
+
+    // Compute the set of slots for which we've seen a START marker but have
+    // not yet seen an END marker at this point in the walk (e.g. on entry
+    // to this bb).
+    BitVector BetweenStartEnd;
+    BetweenStartEnd.resize(NumSlot);
+    for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+             PE = MBB->pred_end(); PI != PE; ++PI) {
+      BlockBitVecMap::const_iterator I = SeenStartMap.find(*PI);
+      if (I != SeenStartMap.end()) {
+        BetweenStartEnd |= I->second;
+      }
+    }
+
+    // Walk the instructions in the block to look for start/end ops.
+    for (MachineInstr &MI : *MBB) {
+      if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
+          MI.getOpcode() == TargetOpcode::LIFETIME_END) {
+        int Slot = getStartOrEndSlot(MI);
+        if (Slot < 0)
+          continue;
+        InterestingSlots.set(Slot);
+        if (MI.getOpcode() == TargetOpcode::LIFETIME_START)
+          BetweenStartEnd.set(Slot);
+        else
+          BetweenStartEnd.reset(Slot);
+        const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
+        if (Allocation) {
+          DEBUG(dbgs() << "Found a lifetime ");
+          DEBUG(dbgs() << (MI.getOpcode() == TargetOpcode::LIFETIME_START
+                               ? "start"
+                               : "end"));
+          DEBUG(dbgs() << " marker for slot #" << Slot);
+          DEBUG(dbgs() << " with allocation: " << Allocation->getName()
+                       << "\n");
+        }
+        Markers.push_back(&MI);
+        MarkersFound += 1;
+      } else {
+        for (const MachineOperand &MO : MI.operands()) {
+          if (!MO.isFI())
+            continue;
+          int Slot = MO.getIndex();
+          if (Slot < 0)
+            continue;
+          if (! BetweenStartEnd.test(Slot)) {
+            DegenerateSlots.set(Slot);
+          }
+        }
+      }
+    }
+    BitVector &SeenStart = SeenStartMap[MBB];
+    SeenStart |= BetweenStartEnd;
+  }
+  if (!MarkersFound) {
+    return 0;
+  }
+  DEBUG(dumpBV("Degenerate slots", DegenerateSlots));
+
+  // Step 2: compute begin/end sets for each block
+
   // NOTE: We use a reverse-post-order iteration to ensure that we obtain a
   // deterministic numbering, and because we'll need a post-order iteration
   // later for solving the liveness dataflow problem.
@@ -246,37 +572,33 @@ unsigned StackColoring::collectMarkers(u
     BlockInfo.Begin.resize(NumSlot);
     BlockInfo.End.resize(NumSlot);
 
+    SmallVector<int, 4> slots;
     for (MachineInstr &MI : *MBB) {
-      if (MI.getOpcode() != TargetOpcode::LIFETIME_START &&
-          MI.getOpcode() != TargetOpcode::LIFETIME_END)
-        continue;
-
-      bool IsStart = MI.getOpcode() == TargetOpcode::LIFETIME_START;
-      const MachineOperand &MO = MI.getOperand(0);
-      int Slot = MO.getIndex();
-      if (Slot < 0)
-        continue;
-
-      Markers.push_back(&MI);
-
-      MarkersFound++;
-
-      const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
-      if (Allocation) {
-        DEBUG(dbgs()<<"Found a lifetime marker for slot #"<<Slot<<
-              " with allocation: "<< Allocation->getName()<<"\n");
-      }
-
-      if (IsStart) {
-        BlockInfo.Begin.set(Slot);
-      } else {
-        if (BlockInfo.Begin.test(Slot)) {
-          // Allocas that start and end within a single block are handled
-          // specially when computing the LiveIntervals to avoid pessimizing
-          // the liveness propagation.
-          BlockInfo.Begin.reset(Slot);
-        } else {
+      bool isStart = false;
+      slots.clear();
+      if (isLifetimeStartOrEnd(MI, slots, isStart)) {
+        if (!isStart) {
+          assert(slots.size() == 1 && "unexpected: MI ends multiple slots");
+          int Slot = slots[0];
+          if (BlockInfo.Begin.test(Slot)) {
+            BlockInfo.Begin.reset(Slot);
+          }
           BlockInfo.End.set(Slot);
+        } else {
+          for (auto Slot : slots) {
+            DEBUG(dbgs() << "Found a use of slot #" << Slot);
+            DEBUG(dbgs() << " at BB#" << MBB->getNumber() << " index ");
+            DEBUG(Indexes->getInstructionIndex(MI).print(dbgs()));
+            const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
+            if (Allocation) {
+              DEBUG(dbgs() << " with allocation: "<< Allocation->getName());
+            }
+            DEBUG(dbgs() << "\n");
+            if (BlockInfo.End.test(Slot)) {
+              BlockInfo.End.reset(Slot);
+            }
+            BlockInfo.Begin.set(Slot);
+          }
         }
       }
     }
@@ -287,90 +609,56 @@ unsigned StackColoring::collectMarkers(u
   return MarkersFound;
 }
 
-void StackColoring::calculateLocalLiveness() {
-  // Perform a standard reverse dataflow computation to solve for
-  // global liveness.  The BEGIN set here is equivalent to KILL in the standard
-  // formulation, and END is equivalent to GEN.  The result of this computation
-  // is a map from blocks to bitvectors where the bitvectors represent which
-  // allocas are live in/out of that block.
-  SmallPtrSet<const MachineBasicBlock*, 8> BBSet(BasicBlockNumbering.begin(),
-                                                 BasicBlockNumbering.end());
-  unsigned NumSSMIters = 0;
+void StackColoring::calculateLocalLiveness()
+{
+  unsigned NumIters = 0;
   bool changed = true;
   while (changed) {
     changed = false;
-    ++NumSSMIters;
-
-    SmallPtrSet<const MachineBasicBlock*, 8> NextBBSet;
+    ++NumIters;
 
     for (const MachineBasicBlock *BB : BasicBlockNumbering) {
-      if (!BBSet.count(BB)) continue;
 
       // Use an iterator to avoid repeated lookups.
       LivenessMap::iterator BI = BlockLiveness.find(BB);
       assert(BI != BlockLiveness.end() && "Block not found");
       BlockLifetimeInfo &BlockInfo = BI->second;
 
+      // Compute LiveIn by unioning together the LiveOut sets of all preds.
       BitVector LocalLiveIn;
-      BitVector LocalLiveOut;
-
-      // Forward propagation from begins to ends.
       for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(),
            PE = BB->pred_end(); PI != PE; ++PI) {
         LivenessMap::const_iterator I = BlockLiveness.find(*PI);
         assert(I != BlockLiveness.end() && "Predecessor not found");
         LocalLiveIn |= I->second.LiveOut;
       }
-      LocalLiveIn |= BlockInfo.End;
-      LocalLiveIn.reset(BlockInfo.Begin);
-
-      // Reverse propagation from ends to begins.
-      for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(),
-           SE = BB->succ_end(); SI != SE; ++SI) {
-        LivenessMap::const_iterator I = BlockLiveness.find(*SI);
-        assert(I != BlockLiveness.end() && "Successor not found");
-        LocalLiveOut |= I->second.LiveIn;
-      }
-      LocalLiveOut |= BlockInfo.Begin;
-      LocalLiveOut.reset(BlockInfo.End);
-
-      LocalLiveIn |= LocalLiveOut;
-      LocalLiveOut |= LocalLiveIn;
 
-      // After adopting the live bits, we need to turn-off the bits which
-      // are de-activated in this block.
+      // Compute LiveOut by subtracting out lifetimes that end in this
+      // block, then adding in lifetimes that begin in this block.  If
+      // we have both BEGIN and END markers in the same basic block
+      // then we know that the BEGIN marker comes after the END,
+      // because we already handle the case where the BEGIN comes
+      // before the END when collecting the markers (and building the
+      // BEGIN/END vectors).
+      BitVector LocalLiveOut = LocalLiveIn;
       LocalLiveOut.reset(BlockInfo.End);
-      LocalLiveIn.reset(BlockInfo.Begin);
-
-      // If we have both BEGIN and END markers in the same basic block then
-      // we know that the BEGIN marker comes after the END, because we already
-      // handle the case where the BEGIN comes before the END when collecting
-      // the markers (and building the BEGIN/END vectore).
-      // Want to enable the LIVE_IN and LIVE_OUT of slots that have both
-      // BEGIN and END because it means that the value lives before and after
-      // this basic block.
-      BitVector LocalEndBegin = BlockInfo.End;
-      LocalEndBegin &= BlockInfo.Begin;
-      LocalLiveIn |= LocalEndBegin;
-      LocalLiveOut |= LocalEndBegin;
+      LocalLiveOut |= BlockInfo.Begin;
 
+      // Update block LiveIn set, noting whether it has changed.
       if (LocalLiveIn.test(BlockInfo.LiveIn)) {
         changed = true;
         BlockInfo.LiveIn |= LocalLiveIn;
-
-        NextBBSet.insert(BB->pred_begin(), BB->pred_end());
       }
 
+      // Update block LiveOut set, noting whether it has changed.
       if (LocalLiveOut.test(BlockInfo.LiveOut)) {
         changed = true;
         BlockInfo.LiveOut |= LocalLiveOut;
-
-        NextBBSet.insert(BB->succ_begin(), BB->succ_end());
       }
     }
-
-    BBSet = std::move(NextBBSet);
   }// while changed.
+
+  NumIterations = NumIters;
 }
 
 void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
@@ -385,29 +673,22 @@ void StackColoring::calculateLiveInterva
     Finishes.clear();
     Finishes.resize(NumSlots);
 
-    // Create the interval for the basic blocks with lifetime markers in them.
-    for (const MachineInstr *MI : Markers) {
-      if (MI->getParent() != &MBB)
-        continue;
+    // Create the interval for the basic blocks containing lifetime begin/end.
+    for (const MachineInstr &MI : MBB) {
 
-      assert((MI->getOpcode() == TargetOpcode::LIFETIME_START ||
-              MI->getOpcode() == TargetOpcode::LIFETIME_END) &&
-             "Invalid Lifetime marker");
-
-      bool IsStart = MI->getOpcode() == TargetOpcode::LIFETIME_START;
-      const MachineOperand &Mo = MI->getOperand(0);
-      int Slot = Mo.getIndex();
-      if (Slot < 0)
+      SmallVector<int, 4> slots;
+      bool IsStart = false;
+      if (!isLifetimeStartOrEnd(MI, slots, IsStart))
         continue;
-
-      SlotIndex ThisIndex = Indexes->getInstructionIndex(*MI);
-
-      if (IsStart) {
-        if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex)
-          Starts[Slot] = ThisIndex;
-      } else {
-        if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex)
-          Finishes[Slot] = ThisIndex;
+      SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
+      for (auto Slot : slots) {
+        if (IsStart) {
+          if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex)
+            Starts[Slot] = ThisIndex;
+        } else {
+          if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex)
+            Finishes[Slot] = ThisIndex;
+        }
       }
     }
 
@@ -423,7 +704,29 @@ void StackColoring::calculateLiveInterva
     }
 
     for (unsigned i = 0; i < NumSlots; ++i) {
-      assert(Starts[i].isValid() == Finishes[i].isValid() && "Unmatched range");
+      //
+      // When LifetimeStartOnFirstUse is turned on, data flow analysis
+      // is forward (from starts to ends), not bidirectional. A
+      // consequence of this is that we can wind up in situations
+      // where Starts[i] is invalid but Finishes[i] is valid and vice
+      // versa. Example:
+      //
+      //     LIFETIME_START x
+      //     if (...) {
+      //       <use of x>
+      //       throw ...;
+      //     }
+      //     LIFETIME_END x
+      //     return 2;
+      //
+      //
+      // Here the slot for "x" will not be live into the block
+      // containing the "return 2" (since lifetimes start with first
+      // use, not at the dominating LIFETIME_START marker).
+      //
+      if (Starts[i].isValid() && !Finishes[i].isValid()) {
+        Finishes[i] = Indexes->getMBBEndIdx(&MBB);
+      }
       if (!Starts[i].isValid())
         continue;
 
@@ -684,7 +987,6 @@ bool StackColoring::runOnMachineFunction
     return false;
 
   SmallVector<int, 8> SortedSlots;
-
   SortedSlots.reserve(NumSlots);
   Intervals.reserve(NumSlots);
 
@@ -717,9 +1019,12 @@ bool StackColoring::runOnMachineFunction
 
   // Calculate the liveness of each block.
   calculateLocalLiveness();
+  DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n");
+  DEBUG(dump());
 
   // Propagate the liveness information.
   calculateLiveIntervals(NumSlots);
+  DEBUG(dumpIntervals());
 
   // Search for allocas which are used outside of the declared lifetime
   // markers.
Modified: llvm/trunk/test/CodeGen/X86/StackColoring.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/StackColoring.ll?rev=270559&r1=270558&r2=270559&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/StackColoring.ll (original)
+++ llvm/trunk/test/CodeGen/X86/StackColoring.ll Tue May 24 08:23:44 2016
@@ -87,7 +87,7 @@ bb3:
 }
 
 ;CHECK-LABEL: myCall_w4:
-;YESCOLOR: subq  $200, %rsp
+;YESCOLOR: subq  $120, %rsp
 ;NOCOLOR: subq  $408, %rsp
 
 define i32 @myCall_w4(i32 %in) {
@@ -217,7 +217,7 @@ bb3:
 
 
 ;CHECK-LABEL: myCall2_nostart:
-;YESCOLOR: subq  $144, %rsp
+;YESCOLOR: subq  $272, %rsp
 ;NOCOLOR: subq  $272, %rsp
 define i32 @myCall2_nostart(i32 %in, i1 %d) {
 entry:
@@ -425,6 +425,120 @@ define i32 @shady_range(i32 %argc, i8**
   ret i32 9
 }
 
+; In this case 'itar1' and 'itar2' can't be overlapped if we treat
+; lifetime.start as the beginning of the lifetime, but we can
+; overlap if we consider first use of the slot as lifetime
+; start. See llvm bug 25776.
+
+;CHECK-LABEL: ifthen_twoslots:
+;YESCOLOR: subq  $536, %rsp
+;NOCOLOR: subq  $1048, %rsp
+
+define i32 @ifthen_twoslots(i32 %x) #0 {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  %itar1 = alloca [128 x i32], align 16
+  %itar2 = alloca [128 x i32], align 16
+  %cleanup.dest.slot = alloca i32
+  store i32 %x, i32* %x.addr, align 4
+  %itar1_start_8 = bitcast [128 x i32]* %itar1 to i8*
+  call void @llvm.lifetime.start(i64 512, i8* %itar1_start_8) #3
+  %itar2_start_8 = bitcast [128 x i32]* %itar2 to i8*
+  call void @llvm.lifetime.start(i64 512, i8* %itar2_start_8) #3
+  %xval = load i32, i32* %x.addr, align 4
+  %and = and i32 %xval, 1
+  %tobool = icmp ne i32 %and, 0
+  br i1 %tobool, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %arraydecay = getelementptr inbounds [128 x i32], [128 x i32]* %itar1, i32 0, i32 0
+  call void @inita(i32* %arraydecay)
+  store i32 1, i32* %retval, align 4
+  store i32 1, i32* %cleanup.dest.slot, align 4
+  %itar2_end_8 = bitcast [128 x i32]* %itar2 to i8*
+  call void @llvm.lifetime.end(i64 512, i8* %itar2_end_8) #3
+  %itar1_end_8 = bitcast [128 x i32]* %itar1 to i8*
+  call void @llvm.lifetime.end(i64 512, i8* %itar1_end_8) #3
+  br label %cleanup
+
+if.else:                                          ; preds = %entry
+  %arraydecay1 = getelementptr inbounds [128 x i32], [128 x i32]* %itar2, i32 0, i32 0
+  call void @inita(i32* %arraydecay1)
+  store i32 0, i32* %retval, align 4
+  store i32 1, i32* %cleanup.dest.slot, align 4
+  %itar2_end2_8 = bitcast [128 x i32]* %itar2 to i8*
+  call void @llvm.lifetime.end(i64 512, i8* %itar2_end2_8) #3
+  %itar1_end2_8 = bitcast [128 x i32]* %itar1 to i8*
+  call void @llvm.lifetime.end(i64 512, i8* %itar1_end2_8) #3
+  br label %cleanup
+
+cleanup:                                          ; preds = %if.else, %if.then
+  %final_retval = load i32,
+ i32* %retval, align 4
+  ret i32 %final_retval
+}
+
+; This function is intended to test the case where you
+; have a reference to a stack slot that lies outside of
+; the START/END lifetime markers-- the flow analysis
+; should catch this and build the lifetime based on the
+; markers only.
+
+;CHECK-LABEL: while_loop:
+;YESCOLOR: subq  $1032, %rsp
+;NOCOLOR: subq  $1544, %rsp
+
+define i32 @while_loop(i32 %x) #0 {
+entry:
+  %b1 = alloca [128 x i32], align 16
+  %b2 = alloca [128 x i32], align 16
+  %b3 = alloca [128 x i32], align 16
+  %tmp = bitcast [128 x i32]* %b1 to i8*
+  call void @llvm.lifetime.start(i64 512, i8* %tmp) #3
+  %tmp1 = bitcast [128 x i32]* %b2 to i8*
+  call void @llvm.lifetime.start(i64 512, i8* %tmp1) #3
+  %and = and i32 %x, 1
+  %tobool = icmp eq i32 %and, 0
+  br i1 %tobool, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  %arraydecay = getelementptr inbounds [128 x i32], [128 x i32]* %b2, i64 0, i64 0
+  call void @inita(i32* %arraydecay) #3
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %arraydecay1 = getelementptr inbounds [128 x i32], [128 x i32]* %b1, i64 0, i64 0
+  call void @inita(i32* %arraydecay1) #3
+  %arraydecay3 = getelementptr inbounds [128 x i32], [128 x i32]* %b3, i64 0, i64 0
+  call void @inita(i32* %arraydecay3) #3
+  %tobool25 = icmp eq i32 %x, 0
+  br i1 %tobool25, label %if.end, label %while.body.lr.ph
+
+while.body.lr.ph:                                 ; preds = %if.else
+  %tmp2 = bitcast [128 x i32]* %b3 to i8*
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.lr.ph, %while.body
+  %x.addr.06 = phi i32 [ %x, %while.body.lr.ph ], [ %dec, %while.body ]
+  %dec = add nsw i32 %x.addr.06, -1
+  call void @llvm.lifetime.start(i64 512, i8* %tmp2) #3
+  call void @inita(i32* %arraydecay3) #3
+  call void @llvm.lifetime.end(i64 512, i8* %tmp2) #3
+  %tobool2 = icmp eq i32 %dec, 0
+  br i1 %tobool2, label %if.end.loopexit, label %while.body
+
+if.end.loopexit:                                  ; preds = %while.body
+  br label %if.end
+
+if.end:                                           ; preds = %if.end.loopexit, %if.else, %if.then
+  call void @llvm.lifetime.end(i64 512, i8* %tmp1) #3
+  call void @llvm.lifetime.end(i64 512, i8* %tmp) #3
+  ret i32 0
+}
+
+declare void @inita(i32*) #2
+
 declare void @bar([100 x i32]* , [100 x i32]*) nounwind
 
 declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
Modified: llvm/trunk/test/CodeGen/X86/misched-aa-colored.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/misched-aa-colored.ll?rev=270559&r1=270558&r2=270559&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/misched-aa-colored.ll (original)
+++ llvm/trunk/test/CodeGen/X86/misched-aa-colored.ll Tue May 24 08:23:44 2016
@@ -155,6 +155,7 @@ entry:
   %ref.tmp.i = alloca %"struct.std::pair.112.119.719.1079.2039.2159.2399.4199", align 8
   %Op.i = alloca %"class.llvm::SDValue.3.603.963.1923.2043.2283.4083", align 8
   %0 = bitcast %"struct.std::pair.112.119.719.1079.2039.2159.2399.4199"* %ref.tmp.i to i8*
+  call void @llvm.lifetime.start(i64 24, i8* %0) #1
   %retval.sroa.0.0.idx.i36 = getelementptr inbounds %"struct.std::pair.112.119.719.1079.2039.2159.2399.4199", %"struct.std::pair.112.119.719.1079.2039.2159.2399.4199"* %ref.tmp.i, i64 0, i32 1, i32 0, i32 0
   %retval.sroa.0.0.copyload.i37 = load i32, i32* %retval.sroa.0.0.idx.i36, align 8
   call void @llvm.lifetime.end(i64 24, i8* %0) #1
    
    
More information about the llvm-commits
mailing list