[llvm-commits] [llvm] r164074 - in /llvm/trunk: include/llvm/CodeGen/LiveIntervalAnalysis.h lib/CodeGen/LiveIntervalAnalysis.cpp lib/CodeGen/RegisterCoalescer.cpp test/CodeGen/ARM/coalesce-subregs.ll

Jakob Stoklund Olesen stoklund at 2pi.dk
Mon Sep 17 16:03:26 PDT 2012


Author: stoklund
Date: Mon Sep 17 18:03:25 2012
New Revision: 164074

URL: http://llvm.org/viewvc/llvm-project?rev=164074&view=rev
Log:
Merge into undefined lanes under -new-coalescer.

Add LIS::pruneValue() and extendToIndices(). These two functions are
used by the register coalescer when merging two live ranges requires
more than a trivial value mapping as supported by LiveInterval::join().

The pruneValue() function can remove the part of a value number that is
going to conflict in join(). Afterwards, extendToIndices can restore the
live range, using any new dominating value numbers and updating the SSA
form.

Use this complex value mapping to support merging a register into a
vector lane that has a conflicting value, but the clobbered lane is
undef.

Modified:
    llvm/trunk/include/llvm/CodeGen/LiveIntervalAnalysis.h
    llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp
    llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp
    llvm/trunk/test/CodeGen/ARM/coalesce-subregs.ll

Modified: llvm/trunk/include/llvm/CodeGen/LiveIntervalAnalysis.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/LiveIntervalAnalysis.h?rev=164074&r1=164073&r2=164074&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/LiveIntervalAnalysis.h (original)
+++ llvm/trunk/include/llvm/CodeGen/LiveIntervalAnalysis.h Mon Sep 17 18:03:25 2012
@@ -165,6 +165,26 @@
     bool shrinkToUses(LiveInterval *li,
                       SmallVectorImpl<MachineInstr*> *dead = 0);
 
+    /// extendToIndices - Extend the live range of LI to reach all points in
+    /// Indices. The points in the Indices array must be jointly dominated by
+    /// existing defs in LI. PHI-defs are added as needed to maintain SSA form.
+    ///
+    /// If a SlotIndex in Indices is the end index of a basic block, LI will be
+    /// extended to be live out of the basic block.
+    ///
+    /// See also LiveRangeCalc::extend().
+    void extendToIndices(LiveInterval *LI, ArrayRef<SlotIndex> Indices);
+
+    /// pruneValue - If an LI value is live at Kill, prune its live range by
+    /// removing any liveness reachable from Kill. Add live range end points to
+    /// EndPoints such that extendToIndices(LI, EndPoints) will reconstruct the
+    /// value's live range.
+    ///
+    /// Calling pruneValue() and extendToIndices() can be used to reconstruct
+    /// SSA form after adding defs to a virtual register.
+    void pruneValue(LiveInterval *LI, SlotIndex Kill,
+                    SmallVectorImpl<SlotIndex> *EndPoints);
+
     SlotIndexes *getSlotIndexes() const {
       return Indexes;
     }

Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp?rev=164074&r1=164073&r2=164074&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp (original)
+++ llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Mon Sep 17 18:03:25 2012
@@ -731,6 +731,73 @@
   return CanSeparate;
 }
 
+void LiveIntervals::extendToIndices(LiveInterval *LI,
+                                    ArrayRef<SlotIndex> Indices) {
+  assert(LRCalc && "LRCalc not initialized.");
+  LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+  for (unsigned i = 0, e = Indices.size(); i != e; ++i)
+    LRCalc->extend(LI, Indices[i]);
+}
+
+void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill,
+                               SmallVectorImpl<SlotIndex> *EndPoints) {
+  LiveRangeQuery LRQ(*LI, Kill);
+  assert (!LRQ.valueDefined() && "Can't prune value at the defining instr");
+
+  // Also can't prune a value that isn't there.
+  VNInfo *VNI = LRQ.valueOut();
+  if (!VNI)
+    return;
+
+  MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill);
+  SlotIndex MBBStart, MBBEnd;
+  tie(MBBStart, MBBEnd) = Indexes->getMBBRange(KillMBB);
+
+  // If VNI isn't live out from KillMBB, the value is trivially pruned.
+  if (LRQ.endPoint() < MBBEnd) {
+    LI->removeRange(Kill, LRQ.endPoint());
+    if (EndPoints) EndPoints->push_back(LRQ.endPoint());
+    return;
+  }
+
+  // VNI is live out of KillMBB.
+  LI->removeRange(Kill, MBBEnd);
+  if (EndPoints) EndPoints->push_back(MBBEnd);
+
+  // Find all blocks that are reachable from MBB without leaving VNI's live
+  // range.
+  for (df_iterator<MachineBasicBlock*>
+       I = df_begin(KillMBB), E = df_end(KillMBB); I != E;) {
+    MachineBasicBlock *MBB = *I;
+    // KillMBB itself was already handled.
+    if (MBB == KillMBB) {
+      ++I;
+      continue;
+    }
+
+    // Check if VNI is live in to MBB.
+    tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB);
+    LiveRangeQuery LRQ(*LI, MBBStart);
+    if (LRQ.valueIn() != VNI) {
+      // This block isn't part of the VNI live range. Prune the search.
+      I.skipChildren();
+      continue;
+    }
+
+    // Prune the search if VNI is killed in MBB.
+    if (LRQ.endPoint() < MBBEnd) {
+      LI->removeRange(MBBStart, LRQ.endPoint());
+      if (EndPoints) EndPoints->push_back(LRQ.endPoint());
+      I.skipChildren();
+      continue;
+    }
+
+    // VNI is live through MBB.
+    LI->removeRange(MBBStart, MBBEnd);
+    if (EndPoints) EndPoints->push_back(MBBEnd);
+    ++I;
+  }
+}
 
 //===----------------------------------------------------------------------===//
 // Register allocator hooks.

Modified: llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp?rev=164074&r1=164073&r2=164074&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp (original)
+++ llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp Mon Sep 17 18:03:25 2012
@@ -1277,6 +1277,11 @@
   /// Returns false if any conflicts were impossible to resolve.
   bool resolveConflicts(JoinVals &Other);
 
+  /// Prune the live range of values in Other.LI where they would conflict with
+  /// CR_Replace values in LI. Collect end points for restoring the live range
+  /// after joining.
+  void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints);
+
   /// Erase any machine instructions that have been coalesced away.
   /// Add erased instructions to ErasedInstrs.
   /// Add foreign virtual registers to ShrinkRegs if their live range ended at
@@ -1421,6 +1426,7 @@
   // We have overlapping values, or possibly a kill of Other.
   // Recursively compute assignments up the dominator tree.
   Other.computeAssignment(V.OtherVNI->id, *this);
+  const Val &OtherV = Other.Vals[V.OtherVNI->id];
 
   // Don't attempt resolving PHI values for now.
   if (VNI->isPHIDef())
@@ -1435,7 +1441,7 @@
   if (CP.isCoalescable(DefMI)) {
     // Some of the lanes copied from OtherVNI may be undef, making them undef
     // here too.
-    V.ValidLanes &= ~V.WriteLanes | Other.Vals[V.OtherVNI->id].ValidLanes;
+    V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes;
     return CR_Erase;
   }
 
@@ -1453,7 +1459,23 @@
       stripCopies(VNI) == stripCopies(V.OtherVNI))
     return CR_Erase;
 
-  // FIXME: Identify CR_Replace opportunities.
+  // If the lanes written by this instruction were all undef in OtherVNI, it is
+  // still safe to join the live ranges. This can't be done with a simple value
+  // mapping, though - OtherVNI will map to multiple values:
+  //
+  //   1 %dst:ssub0 = FOO                <-- OtherVNI
+  //   2 %src = BAR                      <-- VNI
+  //   3 %dst:ssub1 = COPY %src<kill>    <-- Eliminate this copy.
+  //   4 BAZ %dst<kill>
+  //   5 QUUX %src<kill>
+  //
+  // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
+  // handles this complex value mapping.
+  if ((V.WriteLanes & OtherV.ValidLanes) == 0)
+    return CR_Replace;
+
+  // FIXME: Identify CR_Replace opportunities where the clobbered lanes are
+  // dead.
   return CR_Impossible;
 }
 
@@ -1514,6 +1536,18 @@
   return true;
 }
 
+void JoinVals::pruneValues(JoinVals &Other,
+                           SmallVectorImpl<SlotIndex> &EndPoints) {
+  for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+    if (Vals[i].Resolution != CR_Replace)
+      continue;
+    SlotIndex Def = LI.getValNumInfo(i)->def;
+    LIS->pruneValue(&Other.LI, Def, &EndPoints);
+    DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def
+                 << ": " << Other.LI << '\n');
+  }
+}
+
 void JoinVals::eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs,
                            SmallVectorImpl<unsigned> &ShrinkRegs) {
   for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
@@ -1557,6 +1591,14 @@
 
   // All clear, the live ranges can be merged.
 
+  // The merging algorithm in LiveInterval::join() can't handle conflicting
+  // value mappings, so we need to remove any live ranges that overlap a
+  // CR_Replace resolution. Collect a set of end points that can be used to
+  // restore the live range after joining.
+  SmallVector<SlotIndex, 8> EndPoints;
+  LHSVals.pruneValues(RHSVals, EndPoints);
+  RHSVals.pruneValues(LHSVals, EndPoints);
+
   // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
   // registers to require trimming.
   SmallVector<unsigned, 8> ShrinkRegs;
@@ -1574,6 +1616,15 @@
   // ranges. They are reinserted after register allocation.
   MRI->clearKillFlags(LHS.reg);
   MRI->clearKillFlags(RHS.reg);
+
+  if (EndPoints.empty())
+    return true;
+
+  // Recompute the parts of the live range we had to remove because of
+  // CR_Replace conflicts.
+  DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
+               << " points: " << LHS << '\n');
+  LIS->extendToIndices(&LHS, EndPoints);
   return true;
 }
 

Modified: llvm/trunk/test/CodeGen/ARM/coalesce-subregs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/coalesce-subregs.ll?rev=164074&r1=164073&r2=164074&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/coalesce-subregs.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/coalesce-subregs.ll Mon Sep 17 18:03:25 2012
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -mcpu=cortex-a9 -new-coalescer | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
 target triple = "thumbv7-apple-ios0.0.0"
 
@@ -66,3 +66,51 @@
 
 declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly
 declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
+
+; CHECK: f3
+; This function has lane insertions that span basic blocks.
+; The trivial REG_SEQUENCE lowering can't handle that, but the coalescer can.
+;
+; void f3(float *p, float *q) {
+;   float32x2_t x;
+;   x[1] = p[3];
+;   if (q)
+;     x[0] = q[0] + q[1];
+;   else
+;     x[0] = p[2];
+;   vst1_f32(p+4, x);
+; }
+;
+; CHECK-NOT: vmov
+; CHECK-NOT: vorr
+define void @f3(float* %p, float* %q) nounwind ssp {
+entry:
+  %arrayidx = getelementptr inbounds float* %p, i32 3
+  %0 = load float* %arrayidx, align 4
+  %vecins = insertelement <2 x float> undef, float %0, i32 1
+  %tobool = icmp eq float* %q, null
+  br i1 %tobool, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  %1 = load float* %q, align 4
+  %arrayidx2 = getelementptr inbounds float* %q, i32 1
+  %2 = load float* %arrayidx2, align 4
+  %add = fadd float %1, %2
+  %vecins3 = insertelement <2 x float> %vecins, float %add, i32 0
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %arrayidx4 = getelementptr inbounds float* %p, i32 2
+  %3 = load float* %arrayidx4, align 4
+  %vecins5 = insertelement <2 x float> %vecins, float %3, i32 0
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %x.0 = phi <2 x float> [ %vecins3, %if.then ], [ %vecins5, %if.else ]
+  %add.ptr = getelementptr inbounds float* %p, i32 4
+  %4 = bitcast float* %add.ptr to i8*
+  tail call void @llvm.arm.neon.vst1.v2f32(i8* %4, <2 x float> %x.0, i32 4)
+  ret void
+}
+
+declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) nounwind





More information about the llvm-commits mailing list