[llvm] r260758 - [Hexagon] Optimize stack slot spills

Krzysztof Parzyszek via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 12 14:53:35 PST 2016


Author: kparzysz
Date: Fri Feb 12 16:53:35 2016
New Revision: 260758

URL: http://llvm.org/viewvc/llvm-project?rev=260758&view=rev
Log:
[Hexagon] Optimize stack slot spills

Replace spills to memory with spills to registers, if possible. This
applies mostly to predicate registers (both scalar and vector), since
they are very limited in number. A spill of a predicate register may
happen even if there is a general-purpose register available. In cases
like this the stack spill/reload may be eliminated completely.

This optimization will consider all stack objects, regardless of where
they came from and try to match the live range of the stack slot with
a dead range of a register from an appropriate register class.


Added:
    llvm/trunk/lib/Target/Hexagon/HexagonBlockRanges.cpp
    llvm/trunk/lib/Target/Hexagon/HexagonBlockRanges.h
    llvm/trunk/test/CodeGen/Hexagon/avoid-predspill-calleesaved.ll
    llvm/trunk/test/CodeGen/Hexagon/eliminate-pred-spill.ll
    llvm/trunk/test/CodeGen/Hexagon/reg-scavengebug-3.ll
    llvm/trunk/test/CodeGen/Hexagon/vec-pred-spill1.ll
Modified:
    llvm/trunk/lib/Target/Hexagon/CMakeLists.txt
    llvm/trunk/lib/Target/Hexagon/HexagonFrameLowering.cpp
    llvm/trunk/lib/Target/Hexagon/HexagonFrameLowering.h
    llvm/trunk/test/CodeGen/Hexagon/avoid-predspill.ll

Modified: llvm/trunk/lib/Target/Hexagon/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/CMakeLists.txt?rev=260758&r1=260757&r2=260758&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/CMakeLists.txt (original)
+++ llvm/trunk/lib/Target/Hexagon/CMakeLists.txt Fri Feb 12 16:53:35 2016
@@ -17,6 +17,7 @@ add_llvm_target(HexagonCodeGen
   HexagonAsmPrinter.cpp
   HexagonBitSimplify.cpp
   HexagonBitTracker.cpp
+  HexagonBlockRanges.cpp
   HexagonCFGOptimizer.cpp
   HexagonCommonGEP.cpp
   HexagonCopyToCombine.cpp

Added: llvm/trunk/lib/Target/Hexagon/HexagonBlockRanges.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonBlockRanges.cpp?rev=260758&view=auto
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonBlockRanges.cpp (added)
+++ llvm/trunk/lib/Target/Hexagon/HexagonBlockRanges.cpp Fri Feb 12 16:53:35 2016
@@ -0,0 +1,484 @@
+//===--- HexagonBlockRanges.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hbr"
+
+#include "HexagonBlockRanges.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonSubtarget.h"
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <map>
+#include <vector>
+
+using namespace llvm;
+
+bool HexagonBlockRanges::IndexRange::overlaps(const IndexRange &A) const {
+  // If A contains start(), or "this" contains A.start(), then overlap.
+  IndexType S = start(), E = end(), AS = A.start(), AE = A.end();
+  if (AS == S)
+    return true;
+  bool SbAE = (S < AE) || (S == AE && A.TiedEnd);  // S-before-AE.
+  bool ASbE = (AS < E) || (AS == E && TiedEnd);    // AS-before-E.
+  if ((AS < S && SbAE) || (S < AS && ASbE))
+    return true;
+  // Otherwise no overlap.
+  return false;
+}
+
+
+bool HexagonBlockRanges::IndexRange::contains(const IndexRange &A) const {
+  if (start() <= A.start()) {
+    // Treat "None" in the range end as equal to the range start.
+    IndexType E = (end() != IndexType::None) ? end() : start();
+    IndexType AE = (A.end() != IndexType::None) ? A.end() : A.start();
+    if (AE <= E)
+      return true;
+  }
+  return false;
+}
+
+
+void HexagonBlockRanges::IndexRange::merge(const IndexRange &A) {
+  // Allow merging adjacent ranges.
+  assert(end() == A.start() || overlaps(A));
+  IndexType AS = A.start(), AE = A.end();
+  if (AS < start() || start() == IndexType::None)
+    setStart(AS);
+  if (end() < AE || end() == IndexType::None) {
+    setEnd(AE);
+    TiedEnd = A.TiedEnd;
+  } else {
+    if (end() == AE)
+      TiedEnd |= A.TiedEnd;
+  }
+  if (A.Fixed)
+    Fixed = true;
+}
+
+
+void HexagonBlockRanges::RangeList::include(const RangeList &RL) {
+  for (auto &R : RL)
+    if (std::find(begin(), end(), R) == end())
+      push_back(R);
+}
+
+
+// Merge all overlapping ranges in the list, so that all that remains
+// is a list of disjoint ranges.
+void HexagonBlockRanges::RangeList::unionize(bool MergeAdjacent) {
+  if (empty())
+    return;
+
+  std::sort(begin(), end());
+  iterator Iter = begin();
+
+  while (Iter != end()-1) {
+    iterator Next = std::next(Iter);
+    // If MergeAdjacent is true, merge ranges A and B, where A.end == B.start.
+    // This allows merging dead ranges, but is not valid for live ranges.
+    bool Merge = MergeAdjacent && (Iter->end() == Next->start());
+    if (Merge || Iter->overlaps(*Next)) {
+      Iter->merge(*Next);
+      erase(Next);
+      continue;
+    }
+    ++Iter;
+  }
+}
+
+
+// Compute a range A-B and add it to the list.
+void HexagonBlockRanges::RangeList::addsub(const IndexRange &A,
+      const IndexRange &B) {
+  // Exclusion of non-overlapping ranges makes some checks simpler
+  // later in this function.
+  if (!A.overlaps(B)) {
+    // A - B = A.
+    add(A);
+    return;
+  }
+
+  IndexType AS = A.start(), AE = A.end();
+  IndexType BS = B.start(), BE = B.end();
+
+  // If AE is None, then A is included in B, since A and B overlap.
+  // The result of subtraction if empty, so just return.
+  if (AE == IndexType::None)
+    return;
+
+  if (AS < BS) {
+    // A starts before B.
+    // AE cannot be None since A and B overlap.
+    assert(AE != IndexType::None);
+    // Add the part of A that extends on the "less" side of B.
+    add(AS, BS, A.Fixed, false);
+  }
+
+  if (BE < AE) {
+    // BE cannot be Exit here.
+    if (BE == IndexType::None)
+      add(BS, AE, A.Fixed, false);
+    else
+      add(BE, AE, A.Fixed, false);
+  }
+}
+
+
+// Subtract a given range from each element in the list.
+void HexagonBlockRanges::RangeList::subtract(const IndexRange &Range) {
+  // Cannot assume that the list is unionized (i.e. contains only non-
+  // overlapping ranges.
+  RangeList T;
+  for (iterator Next, I = begin(); I != end(); I = Next) {
+    IndexRange &Rg = *I;
+    if (Rg.overlaps(Range)) {
+      T.addsub(Rg, Range);
+      Next = this->erase(I);
+    } else {
+      Next = std::next(I);
+    }
+  }
+  include(T);
+}
+
+
+HexagonBlockRanges::InstrIndexMap::InstrIndexMap(MachineBasicBlock &B)
+    : Block(B) {
+  IndexType Idx = IndexType::First;
+  First = Idx;
+  for (auto &In : B) {
+    if (In.isDebugValue())
+      continue;
+    assert(getIndex(&In) == IndexType::None && "Instruction already in map");
+    Map.insert(std::make_pair(Idx, &In));
+    ++Idx;
+  }
+  Last = B.empty() ? IndexType::None : unsigned(Idx)-1;
+}
+
+
+MachineInstr *HexagonBlockRanges::InstrIndexMap::getInstr(IndexType Idx) const {
+  auto F = Map.find(Idx);
+  return (F != Map.end()) ? F->second : 0;
+}
+
+
+HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getIndex(
+      MachineInstr *MI) const {
+  for (auto &I : Map)
+    if (I.second == MI)
+      return I.first;
+  return IndexType::None;
+}
+
+
+HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getPrevIndex(
+      IndexType Idx) const {
+  assert (Idx != IndexType::None);
+  if (Idx == IndexType::Entry)
+    return IndexType::None;
+  if (Idx == IndexType::Exit)
+    return Last;
+  if (Idx == First)
+    return IndexType::Entry;
+  return unsigned(Idx)-1;
+}
+
+
+HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getNextIndex(
+      IndexType Idx) const {
+  assert (Idx != IndexType::None);
+  if (Idx == IndexType::Entry)
+    return IndexType::First;
+  if (Idx == IndexType::Exit || Idx == Last)
+    return IndexType::None;
+  return unsigned(Idx)+1;
+}
+
+
+void HexagonBlockRanges::InstrIndexMap::replaceInstr(MachineInstr *OldMI,
+      MachineInstr *NewMI) {
+  for (auto &I : Map) {
+    if (I.second != OldMI)
+      continue;
+    if (NewMI != nullptr)
+      I.second = NewMI;
+    else
+      Map.erase(I.first);
+    break;
+  }
+}
+
+
+HexagonBlockRanges::HexagonBlockRanges(MachineFunction &mf)
+  : MF(mf), HST(mf.getSubtarget<HexagonSubtarget>()),
+    TII(*HST.getInstrInfo()), TRI(*HST.getRegisterInfo()),
+    Reserved(TRI.getReservedRegs(mf)) {
+  // Consider all non-allocatable registers as reserved.
+  for (auto I = TRI.regclass_begin(), E = TRI.regclass_end(); I != E; ++I) {
+    auto *RC = *I;
+    if (RC->isAllocatable())
+      continue;
+    for (unsigned R : *RC)
+      Reserved[R] = true;
+  }
+}
+
+
+HexagonBlockRanges::RegisterSet HexagonBlockRanges::getLiveIns(
+      const MachineBasicBlock &B) {
+  RegisterSet LiveIns;
+  for (auto I : B.liveins())
+    if (!Reserved[I.PhysReg])
+      LiveIns.insert({I.PhysReg, 0});
+  return LiveIns;
+}
+
+
+HexagonBlockRanges::RegisterSet HexagonBlockRanges::expandToSubRegs(
+      RegisterRef R, const MachineRegisterInfo &MRI,
+      const TargetRegisterInfo &TRI) {
+  RegisterSet SRs;
+
+  if (R.Sub != 0) {
+    SRs.insert(R);
+    return SRs;
+  }
+
+  if (TargetRegisterInfo::isPhysicalRegister(R.Reg)) {
+    MCSubRegIterator I(R.Reg, &TRI);
+    if (!I.isValid())
+      SRs.insert({R.Reg, 0});
+    for (; I.isValid(); ++I)
+      SRs.insert({*I, 0});
+  } else {
+    assert(TargetRegisterInfo::isVirtualRegister(R.Reg));
+    auto &RC = *MRI.getRegClass(R.Reg);
+    unsigned PReg = *RC.begin();
+    MCSubRegIndexIterator I(PReg, &TRI);
+    if (!I.isValid())
+      SRs.insert({R.Reg, 0});
+    for (; I.isValid(); ++I)
+      SRs.insert({R.Reg, I.getSubRegIndex()});
+  }
+  return SRs;
+}
+
+
+void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap,
+      RegToRangeMap &LiveMap) {
+  std::map<RegisterRef,IndexType> LastDef, LastUse;
+  RegisterSet LiveOnEntry;
+  MachineBasicBlock &B = IndexMap.getBlock();
+  MachineRegisterInfo &MRI = B.getParent()->getRegInfo();
+
+  for (auto R : getLiveIns(B))
+    for (auto S : expandToSubRegs(R, MRI, TRI))
+      LiveOnEntry.insert(S);
+
+  for (auto R : LiveOnEntry)
+    LastDef[R] = IndexType::Entry;
+
+  auto closeRange = [&LastUse,&LastDef,&LiveMap] (RegisterRef R) -> void {
+    auto LD = LastDef[R], LU = LastUse[R];
+    if (LD == IndexType::None)
+      LD = IndexType::Entry;
+    if (LU == IndexType::None)
+      LU = IndexType::Exit;
+    LiveMap[R].add(LD, LU, false, false);
+    LastUse[R] = LastDef[R] = IndexType::None;
+  };
+
+  for (auto &In : B) {
+    if (In.isDebugValue())
+      continue;
+    IndexType Index = IndexMap.getIndex(&In);
+    // Process uses first.
+    for (auto &Op : In.operands()) {
+      if (!Op.isReg() || !Op.isUse() || Op.isUndef())
+        continue;
+      RegisterRef R = { Op.getReg(), Op.getSubReg() };
+      if (TargetRegisterInfo::isPhysicalRegister(R.Reg) && Reserved[R.Reg])
+        continue;
+      bool IsKill = Op.isKill();
+      for (auto S : expandToSubRegs(R, MRI, TRI)) {
+        LastUse[S] = Index;
+        if (IsKill)
+          closeRange(S);
+      }
+    }
+    // Process defs.
+    for (auto &Op : In.operands()) {
+      if (!Op.isReg() || !Op.isDef() || Op.isUndef())
+        continue;
+      RegisterRef R = { Op.getReg(), Op.getSubReg() };
+      if (TargetRegisterInfo::isPhysicalRegister(R.Reg) && Reserved[R.Reg])
+        continue;
+      for (auto S : expandToSubRegs(R, MRI, TRI)) {
+        if (LastDef[S] != IndexType::None)
+          closeRange(S);
+        LastDef[S] = Index;
+      }
+    }
+  }
+
+  // Collect live-on-exit.
+  RegisterSet LiveOnExit;
+  for (auto *SB : B.successors())
+    for (auto R : getLiveIns(*SB))
+      for (auto S : expandToSubRegs(R, MRI, TRI))
+        LiveOnExit.insert(S);
+
+  for (auto R : LiveOnExit)
+    LastUse[R] = IndexType::Exit;
+
+  // Process remaining registers.
+  RegisterSet Left;
+  for (auto &I : LastUse)
+    if (I.second != IndexType::None)
+      Left.insert(I.first);
+  for (auto &I : LastDef)
+    if (I.second != IndexType::None)
+      Left.insert(I.first);
+  for (auto R : Left)
+    closeRange(R);
+
+  // Finalize the live ranges.
+  for (auto &P : LiveMap)
+    P.second.unionize();
+}
+
+
+HexagonBlockRanges::RegToRangeMap HexagonBlockRanges::computeLiveMap(
+      InstrIndexMap &IndexMap) {
+  RegToRangeMap LiveMap;
+  DEBUG(dbgs() << __func__ << ": index map\n" << IndexMap << '\n');
+  computeInitialLiveRanges(IndexMap, LiveMap);
+  DEBUG(dbgs() << __func__ << ": live map\n"
+               << PrintRangeMap(LiveMap, TRI) << '\n');
+  return LiveMap;
+}
+
+
+HexagonBlockRanges::RegToRangeMap HexagonBlockRanges::computeDeadMap(
+      InstrIndexMap &IndexMap, RegToRangeMap &LiveMap) {
+  RegToRangeMap DeadMap;
+
+  auto addDeadRanges = [&IndexMap,&LiveMap,&DeadMap] (RegisterRef R) -> void {
+    auto F = LiveMap.find(R);
+    if (F == LiveMap.end() || F->second.empty()) {
+      DeadMap[R].add(IndexType::Entry, IndexType::Exit, false, false);
+      return;
+    }
+
+    RangeList &RL = F->second;
+    RangeList::iterator A = RL.begin(), Z = RL.end()-1;
+
+    // Try to create the initial range.
+    if (A->start() != IndexType::Entry) {
+      IndexType DE = IndexMap.getPrevIndex(A->start());
+      if (DE != IndexType::Entry)
+        DeadMap[R].add(IndexType::Entry, DE, false, false);
+    }
+
+    while (A != Z) {
+      // Creating a dead range that follows A.  Pay attention to empty
+      // ranges (i.e. those ending with "None").
+      IndexType AE = (A->end() == IndexType::None) ? A->start() : A->end();
+      IndexType DS = IndexMap.getNextIndex(AE);
+      ++A;
+      IndexType DE = IndexMap.getPrevIndex(A->start());
+      if (DS < DE)
+        DeadMap[R].add(DS, DE, false, false);
+    }
+
+    // Try to create the final range.
+    if (Z->end() != IndexType::Exit) {
+      IndexType ZE = (Z->end() == IndexType::None) ? Z->start() : Z->end();
+      IndexType DS = IndexMap.getNextIndex(ZE);
+      if (DS < IndexType::Exit)
+        DeadMap[R].add(DS, IndexType::Exit, false, false);
+    }
+  };
+
+  MachineFunction &MF = *IndexMap.getBlock().getParent();
+  auto &MRI = MF.getRegInfo();
+  unsigned NumRegs = TRI.getNumRegs();
+  BitVector Visited(NumRegs);
+  for (unsigned R = 1; R < NumRegs; ++R) {
+    for (auto S : expandToSubRegs({R,0}, MRI, TRI)) {
+      if (Reserved[S.Reg] || Visited[S.Reg])
+        continue;
+      addDeadRanges(S);
+      Visited[S.Reg] = true;
+    }
+  }
+  for (auto &P : LiveMap)
+    if (TargetRegisterInfo::isVirtualRegister(P.first.Reg))
+      addDeadRanges(P.first);
+
+  DEBUG(dbgs() << __func__ << ": dead map\n"
+               << PrintRangeMap(DeadMap, TRI) << '\n');
+  return DeadMap;
+}
+
+
+raw_ostream &operator<< (raw_ostream &OS, HexagonBlockRanges::IndexType Idx) {
+  if (Idx == HexagonBlockRanges::IndexType::None)
+    return OS << '-';
+  if (Idx == HexagonBlockRanges::IndexType::Entry)
+    return OS << 'n';
+  if (Idx == HexagonBlockRanges::IndexType::Exit)
+    return OS << 'x';
+  return OS << unsigned(Idx)-HexagonBlockRanges::IndexType::First+1;
+}
+
+// A mapping to translate between instructions and their indices.
+raw_ostream &operator<< (raw_ostream &OS,
+      const HexagonBlockRanges::IndexRange &IR) {
+  OS << '[' << IR.start() << ':' << IR.end() << (IR.TiedEnd ? '}' : ']');
+  if (IR.Fixed)
+    OS << '!';
+  return OS;
+}
+
+raw_ostream &operator<< (raw_ostream &OS,
+      const HexagonBlockRanges::RangeList &RL) {
+  for (auto &R : RL)
+    OS << R << " ";
+  return OS;
+}
+
+raw_ostream &operator<< (raw_ostream &OS,
+      const HexagonBlockRanges::InstrIndexMap &M) {
+  for (auto &In : M.Block) {
+    HexagonBlockRanges::IndexType Idx = M.getIndex(&In);
+    OS << Idx << (Idx == M.Last ? ". " : "  ") << In;
+  }
+  return OS;
+}
+
+raw_ostream &operator<< (raw_ostream &OS,
+      const HexagonBlockRanges::PrintRangeMap &P) {
+  for (auto &I : P.Map) {
+    const HexagonBlockRanges::RangeList &RL = I.second;
+    OS << PrintReg(I.first.Reg, &P.TRI, I.first.Sub) << " -> " << RL << "\n";
+  }
+  return OS;
+}

Added: llvm/trunk/lib/Target/Hexagon/HexagonBlockRanges.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonBlockRanges.h?rev=260758&view=auto
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonBlockRanges.h (added)
+++ llvm/trunk/lib/Target/Hexagon/HexagonBlockRanges.h Fri Feb 12 16:53:35 2016
@@ -0,0 +1,240 @@
+//===--- HexagonBlockRanges.h ---------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef HEXAGON_BLOCK_RANGES_H
+#define HEXAGON_BLOCK_RANGES_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/MC/MCRegisterInfo.h"  // For MCPhysReg.
+#include <map>
+#include <set>
+#include <vector>
+
+namespace llvm {
+  class Function;
+  class HexagonSubtarget;
+  class MachineBasicBlock;
+  class MachineFunction;
+  class MachineInstr;
+  class MCInstrDesc;
+  class raw_ostream;
+  class TargetInstrInfo;
+  class TargetRegisterClass;
+  class TargetRegisterInfo;
+  class Type;
+}
+
+using namespace llvm;
+
+struct HexagonBlockRanges {
+  HexagonBlockRanges(MachineFunction &MF);
+
+  struct RegisterRef {
+    unsigned Reg, Sub;
+    bool operator<(RegisterRef R) const {
+      return Reg < R.Reg || (Reg == R.Reg && Sub < R.Sub);
+    }
+  };
+  typedef std::set<RegisterRef> RegisterSet;
+
+  // This is to represent an "index", which is an abstraction of a position
+  // of an instruction within a basic block.
+  class IndexType {
+  public:
+    enum : unsigned {
+      None  = 0,
+      Entry = 1,
+      Exit  = 2,
+      First = 11  // 10th + 1st
+    };
+    static bool isInstr(IndexType X) { return X.Index >= First; }
+
+    IndexType() : Index(None) {}
+    IndexType(unsigned Idx) : Index(Idx) {}
+    operator unsigned() const;
+    bool operator== (unsigned x) const;
+    bool operator== (IndexType Idx) const;
+    bool operator!= (unsigned x) const;
+    bool operator!= (IndexType Idx) const;
+    IndexType operator++ ();
+    bool operator< (unsigned Idx) const;
+    bool operator< (IndexType Idx) const;
+    bool operator<= (IndexType Idx) const;
+
+  private:
+    bool operator>  (IndexType Idx) const;
+    bool operator>= (IndexType Idx) const;
+
+    unsigned Index;
+  };
+
+  // A range of indices, essentially a representation of a live range.
+  // This is also used to represent "dead ranges", i.e. ranges where a
+  // register is dead.
+  class IndexRange : public std::pair<IndexType,IndexType> {
+  public:
+    IndexRange() : Fixed(false), TiedEnd(false) {}
+    IndexRange(IndexType Start, IndexType End, bool F = false, bool T = false)
+      : std::pair<IndexType,IndexType>(Start, End), Fixed(F), TiedEnd(T) {}
+    IndexType start() const { return first; }
+    IndexType end() const   { return second; }
+
+    bool operator< (const IndexRange &A) const {
+      return start() < A.start();
+    }
+    bool overlaps(const IndexRange &A) const;
+    bool contains(const IndexRange &A) const;
+    void merge(const IndexRange &A);
+
+    bool Fixed;      // Can be renamed?  "Fixed" means "no".
+    bool TiedEnd;    // The end is not a use, but a dead def tied to a use.
+
+  private:
+    void setStart(const IndexType &S) { first = S; }
+    void setEnd(const IndexType &E)   { second = E; }
+  };
+
+  // A list of index ranges. This represents liveness of a register
+  // in a basic block.
+  class RangeList : public std::vector<IndexRange> {
+  public:
+    void add(IndexType Start, IndexType End, bool Fixed, bool TiedEnd) {
+      push_back(IndexRange(Start, End, Fixed, TiedEnd));
+    }
+    void add(const IndexRange &Range) {
+      push_back(Range);
+    }
+    void include(const RangeList &RL);
+    void unionize(bool MergeAdjacent = false);
+    void subtract(const IndexRange &Range);
+
+  private:
+    void addsub(const IndexRange &A, const IndexRange &B);
+  };
+
+  class InstrIndexMap {
+  public:
+    InstrIndexMap(MachineBasicBlock &B);
+    MachineInstr *getInstr(IndexType Idx) const;
+    IndexType getIndex(MachineInstr *MI) const;
+    MachineBasicBlock &getBlock() const { return Block; }
+    IndexType getPrevIndex(IndexType Idx) const;
+    IndexType getNextIndex(IndexType Idx) const;
+    void replaceInstr(MachineInstr *OldMI, MachineInstr *NewMI);
+
+    friend raw_ostream &operator<< (raw_ostream &OS, const InstrIndexMap &Map);
+    IndexType First, Last;
+
+  private:
+    MachineBasicBlock &Block;
+    std::map<IndexType,MachineInstr*> Map;
+  };
+
+  typedef std::map<RegisterRef,RangeList> RegToRangeMap;
+  RegToRangeMap computeLiveMap(InstrIndexMap &IndexMap);
+  RegToRangeMap computeDeadMap(InstrIndexMap &IndexMap, RegToRangeMap &LiveMap);
+  static RegisterSet expandToSubRegs(RegisterRef R,
+      const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI);
+
+  struct PrintRangeMap {
+    PrintRangeMap(const RegToRangeMap &M, const TargetRegisterInfo &I)
+        : Map(M), TRI(I) {}
+
+    friend raw_ostream &operator<< (raw_ostream &OS, const PrintRangeMap &P);
+  private:
+    const RegToRangeMap ⤅
+    const TargetRegisterInfo &TRI;
+  };
+
+private:
+  RegisterSet getLiveIns(const MachineBasicBlock &B);
+
+  void computeInitialLiveRanges(InstrIndexMap &IndexMap,
+      RegToRangeMap &LiveMap);
+
+  MachineFunction &MF;
+  const HexagonSubtarget &HST;
+  const TargetInstrInfo &TII;
+  const TargetRegisterInfo &TRI;
+  BitVector Reserved;
+};
+
+
+inline HexagonBlockRanges::IndexType::operator unsigned() const {
+  assert(Index >= First);
+  return Index;
+}
+
+inline bool HexagonBlockRanges::IndexType::operator== (unsigned x) const {
+  return Index == x;
+}
+
+inline bool HexagonBlockRanges::IndexType::operator== (IndexType Idx) const {
+  return Index == Idx.Index;
+}
+
+inline bool HexagonBlockRanges::IndexType::operator!= (unsigned x) const {
+  return Index != x;
+}
+
+inline bool HexagonBlockRanges::IndexType::operator!= (IndexType Idx) const {
+  return Index != Idx.Index;
+}
+
+inline
+HexagonBlockRanges::IndexType HexagonBlockRanges::IndexType::operator++ () {
+  assert(Index != None);
+  assert(Index != Exit);
+  if (Index == Entry)
+    Index = First;
+  else
+    ++Index;
+  return *this;
+}
+
+inline bool HexagonBlockRanges::IndexType::operator< (unsigned Idx) const {
+  return operator< (IndexType(Idx));
+}
+
+inline bool HexagonBlockRanges::IndexType::operator< (IndexType Idx) const {
+  // !(x < x).
+  if (Index == Idx.Index)
+    return false;
+  // !(None < x) for all x.
+  // !(x < None) for all x.
+  if (Index == None || Idx.Index == None)
+    return false;
+  // !(Exit < x) for all x.
+  // !(x < Entry) for all x.
+  if (Index == Exit || Idx.Index == Entry)
+    return false;
+  // Entry < x for all x != Entry.
+  // x < Exit for all x != Exit.
+  if (Index == Entry || Idx.Index == Exit)
+    return true;
+
+  return Index < Idx.Index;
+}
+
+inline bool HexagonBlockRanges::IndexType::operator<= (IndexType Idx) const {
+  return operator==(Idx) || operator<(Idx);
+}
+
+
+raw_ostream &operator<< (raw_ostream &OS, HexagonBlockRanges::IndexType Idx);
+raw_ostream &operator<< (raw_ostream &OS,
+      const HexagonBlockRanges::IndexRange &IR);
+raw_ostream &operator<< (raw_ostream &OS,
+      const HexagonBlockRanges::RangeList &RL);
+raw_ostream &operator<< (raw_ostream &OS,
+      const HexagonBlockRanges::InstrIndexMap &M);
+raw_ostream &operator<< (raw_ostream &OS,
+      const HexagonBlockRanges::PrintRangeMap &P);
+
+#endif

Modified: llvm/trunk/lib/Target/Hexagon/HexagonFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonFrameLowering.cpp?rev=260758&r1=260757&r2=260758&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonFrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonFrameLowering.cpp Fri Feb 12 16:53:35 2016
@@ -10,6 +10,7 @@
 
 #define DEBUG_TYPE "hexagon-pei"
 
+#include "HexagonBlockRanges.h"
 #include "HexagonFrameLowering.h"
 #include "HexagonInstrInfo.h"
 #include "HexagonMachineFunctionInfo.h"
@@ -147,6 +148,9 @@ static cl::opt<unsigned> ShrinkLimit("sh
 static cl::opt<bool> UseAllocframe("use-allocframe", cl::init(true),
     cl::Hidden, cl::desc("Use allocframe more conservatively"));
 
+static cl::opt<bool> OptimizeSpillSlots("hexagon-opt-spill", cl::Hidden,
+    cl::init(true), cl::desc("Optimize spill slots"));
+
 
 namespace llvm {
   void initializeHexagonCallFrameInformationPass(PassRegistry&);
@@ -1046,13 +1050,13 @@ static bool needToReserveScavengingSpill
   // Check for an unused caller-saved register.
   for ( ; *CallerSavedRegs; ++CallerSavedRegs) {
     MCPhysReg FreeReg = *CallerSavedRegs;
-    if (!MRI.reg_nodbg_empty(FreeReg))
+    if (MRI.isPhysRegUsed(FreeReg))
       continue;
 
     // Check aliased register usage.
     bool IsCurrentRegUsed = false;
     for (MCRegAliasIterator AI(FreeReg, &HRI, false); AI.isValid(); ++AI)
-      if (!MRI.reg_nodbg_empty(*AI)) {
+      if (MRI.isPhysRegUsed(*AI)) {
         IsCurrentRegUsed = true;
         break;
       }
@@ -1634,7 +1638,8 @@ void HexagonFrameLowering::determineCall
   // Replace predicate register pseudo spill code.
   SmallVector<unsigned,8> NewRegs;
   expandSpillMacros(MF, NewRegs);
-
+  if (OptimizeSpillSlots)
+    optimizeSpillSlots(MF, NewRegs);
 
   // We need to reserve a a spill slot if scavenging could potentially require
   // spilling a scavenged register.
@@ -1665,6 +1670,354 @@ void HexagonFrameLowering::determineCall
 }
 
 
+unsigned HexagonFrameLowering::findPhysReg(MachineFunction &MF,
+      HexagonBlockRanges::IndexRange &FIR,
+      HexagonBlockRanges::InstrIndexMap &IndexMap,
+      HexagonBlockRanges::RegToRangeMap &DeadMap,
+      const TargetRegisterClass *RC) const {
+  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+  auto &MRI = MF.getRegInfo();
+
+  auto isDead = [&FIR,&DeadMap] (unsigned Reg) -> bool {
+    auto F = DeadMap.find({Reg,0});
+    if (F == DeadMap.end())
+      return false;
+    for (auto &DR : F->second)
+      if (DR.contains(FIR))
+        return true;
+    return false;
+  };
+
+  for (unsigned Reg : RC->getRawAllocationOrder(MF)) {
+    bool Dead = true;
+    for (auto R : HexagonBlockRanges::expandToSubRegs({Reg,0}, MRI, HRI)) {
+      if (isDead(R.Reg))
+        continue;
+      Dead = false;
+      break;
+    }
+    if (Dead)
+      return Reg;
+  }
+  return 0;
+}
+
+void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
+      SmallVectorImpl<unsigned> &VRegs) const {
+  auto &HST = MF.getSubtarget<HexagonSubtarget>();
+  auto &HII = *HST.getInstrInfo();
+  auto &HRI = *HST.getRegisterInfo();
+  auto &MRI = MF.getRegInfo();
+  HexagonBlockRanges HBR(MF);
+
+  typedef std::map<MachineBasicBlock*,HexagonBlockRanges::InstrIndexMap>
+      BlockIndexMap;
+  typedef std::map<MachineBasicBlock*,HexagonBlockRanges::RangeList>
+      BlockRangeMap;
+  typedef HexagonBlockRanges::IndexType IndexType;
+
+  struct SlotInfo {
+    BlockRangeMap Map;
+    unsigned Size = 0;
+    const TargetRegisterClass *RC = nullptr;
+  };
+
+  BlockIndexMap BlockIndexes;
+  SmallSet<int,4> BadFIs;
+  std::map<int,SlotInfo> FIRangeMap;
+
+  auto getRegClass = [&MRI,&HRI] (HexagonBlockRanges::RegisterRef R)
+        -> const TargetRegisterClass* {
+    if (TargetRegisterInfo::isPhysicalRegister(R.Reg))
+      assert(R.Sub == 0);
+    if (TargetRegisterInfo::isVirtualRegister(R.Reg)) {
+      auto *RCR = MRI.getRegClass(R.Reg);
+      if (R.Sub == 0)
+        return RCR;
+      unsigned PR = *RCR->begin();
+      R.Reg = HRI.getSubReg(PR, R.Sub);
+    }
+    return HRI.getMinimalPhysRegClass(R.Reg);
+  };
+  // Accumulate register classes: get a common class for a pre-existing
+  // class HaveRC and a new class NewRC. Return nullptr if a common class
+  // cannot be found, otherwise return the resulting class. If HaveRC is
+  // nullptr, assume that it is still unset.
+  auto getCommonRC = [&HRI] (const TargetRegisterClass *HaveRC,
+                             const TargetRegisterClass *NewRC)
+        -> const TargetRegisterClass* {
+    if (HaveRC == nullptr || HaveRC == NewRC)
+      return NewRC;
+    // Different classes, both non-null. Pick the more general one.
+    if (HaveRC->hasSubClassEq(NewRC))
+      return HaveRC;
+    if (NewRC->hasSubClassEq(HaveRC))
+      return NewRC;
+    return nullptr;
+  };
+
+  // Scan all blocks in the function. Check all occurrences of frame indexes,
+  // and collect relevant information.
+  for (auto &B : MF) {
+    std::map<int,IndexType> LastStore, LastLoad;
+    auto P = BlockIndexes.emplace(&B, HexagonBlockRanges::InstrIndexMap(B));
+    auto &IndexMap = P.first->second;
+    DEBUG(dbgs() << "Index map for BB#" << B.getNumber() << "\n"
+                 << IndexMap << '\n');
+
+    for (auto &In : B) {
+      int LFI, SFI;
+      bool Load = HII.isLoadFromStackSlot(&In, LFI) && !HII.isPredicated(&In);
+      bool Store = HII.isStoreToStackSlot(&In, SFI) && !HII.isPredicated(&In);
+      if (Load && Store) {
+        // If it's both a load and a store, then we won't handle it.
+        BadFIs.insert(LFI);
+        BadFIs.insert(SFI);
+        continue;
+      }
+      // Check for register classes of the register used as the source for
+      // the store, and the register used as the destination for the load.
+      // Also, only accept base+imm_offset addressing modes. Other addressing
+      // modes can have side-effects (post-increments, etc.). For stack
+      // slots they are very unlikely, so there is not much loss due to
+      // this restriction.
+      if (Load || Store) {
+        int TFI = Load ? LFI : SFI;
+        unsigned AM = HII.getAddrMode(&In);
+        SlotInfo &SI = FIRangeMap[TFI];
+        bool Bad = (AM != HexagonII::BaseImmOffset);
+        if (!Bad) {
+          // If the addressing mode is ok, check the register class.
+          const TargetRegisterClass *RC = nullptr;
+          if (Load) {
+            MachineOperand &DataOp = In.getOperand(0);
+            RC = getRegClass({DataOp.getReg(), DataOp.getSubReg()});
+          } else {
+            MachineOperand &DataOp = In.getOperand(2);
+            RC = getRegClass({DataOp.getReg(), DataOp.getSubReg()});
+          }
+          RC = getCommonRC(SI.RC, RC);
+          if (RC == nullptr)
+            Bad = true;
+          else
+            SI.RC = RC;
+        }
+        if (!Bad) {
+          // Check sizes.
+          unsigned S = (1U << (HII.getMemAccessSize(&In) - 1));
+          if (SI.Size != 0 && SI.Size != S)
+            Bad = true;
+          else
+            SI.Size = S;
+        }
+        if (Bad)
+          BadFIs.insert(TFI);
+      }
+
+      // Locate uses of frame indices.
+      for (unsigned i = 0, n = In.getNumOperands(); i < n; ++i) {
+        const MachineOperand &Op = In.getOperand(i);
+        if (!Op.isFI())
+          continue;
+        int FI = Op.getIndex();
+        // Make sure that the following operand is an immediate and that
+        // it is 0. This is the offset in the stack object.
+        if (i+1 >= n || !In.getOperand(i+1).isImm() ||
+            In.getOperand(i+1).getImm() != 0)
+          BadFIs.insert(FI);
+        if (BadFIs.count(FI))
+          continue;
+
+        IndexType Index = IndexMap.getIndex(&In);
+        if (Load) {
+          if (LastStore[FI] == IndexType::None)
+            LastStore[FI] = IndexType::Entry;
+          LastLoad[FI] = Index;
+        } else if (Store) {
+          HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B];
+          if (LastStore[FI] != IndexType::None)
+            RL.add(LastStore[FI], LastLoad[FI], false, false);
+          else if (LastLoad[FI] != IndexType::None)
+            RL.add(IndexType::Entry, LastLoad[FI], false, false);
+          LastLoad[FI] = IndexType::None;
+          LastStore[FI] = Index;
+        } else {
+          BadFIs.insert(FI);
+        }
+      }
+    }
+
+    for (auto &I : LastLoad) {
+      IndexType LL = I.second;
+      if (LL == IndexType::None)
+        continue;
+      auto &RL = FIRangeMap[I.first].Map[&B];
+      IndexType &LS = LastStore[I.first];
+      if (LS != IndexType::None)
+        RL.add(LS, LL, false, false);
+      else
+        RL.add(IndexType::Entry, LL, false, false);
+      LS = IndexType::None;
+    }
+    for (auto &I : LastStore) {
+      IndexType LS = I.second;
+      if (LS == IndexType::None)
+        continue;
+      auto &RL = FIRangeMap[I.first].Map[&B];
+      RL.add(LS, IndexType::None, false, false);
+    }
+  }
+
+  DEBUG({
+    for (auto &P : FIRangeMap) {
+      dbgs() << "fi#" << P.first;
+      if (BadFIs.count(P.first))
+        dbgs() << " (bad)";
+      dbgs() << "  RC: ";
+      if (P.second.RC != nullptr)
+        dbgs() << HRI.getRegClassName(P.second.RC) << '\n';
+      else
+        dbgs() << "<null>\n";
+      for (auto &R : P.second.Map)
+        dbgs() << "  BB#" << R.first->getNumber() << " { " << R.second << "}\n";
+    }
+  });
+
+  // When a slot is loaded from in a block without being stored to in the
+  // same block, it is live-on-entry to this block. To avoid CFG analysis,
+  // consider this slot to be live-on-exit from all blocks.
+  SmallSet<int,4> LoxFIs;
+
+  std::map<MachineBasicBlock*,std::vector<int>> BlockFIMap;
+
+  for (auto &P : FIRangeMap) {
+    // P = pair(FI, map: BB->RangeList)
+    if (BadFIs.count(P.first))
+      continue;
+    for (auto &B : MF) {
+      auto F = P.second.Map.find(&B);
+      // F = pair(BB, RangeList)
+      if (F == P.second.Map.end() || F->second.empty())
+        continue;
+      HexagonBlockRanges::IndexRange &IR = F->second.front();
+      if (IR.start() == IndexType::Entry)
+        LoxFIs.insert(P.first);
+      BlockFIMap[&B].push_back(P.first);
+    }
+  }
+
+  DEBUG({
+    dbgs() << "Block-to-FI map (* -- live-on-exit):\n";
+    for (auto &P : BlockFIMap) {
+      auto &FIs = P.second;
+      if (FIs.empty())
+        continue;
+      dbgs() << "  BB#" << P.first->getNumber() << ": {";
+      for (auto I : FIs) {
+        dbgs() << " fi#" << I;
+        if (LoxFIs.count(I))
+          dbgs() << '*';
+      }
+      dbgs() << " }\n";
+    }
+  });
+
+  // eliminate loads, when all loads eliminated, eliminate all stores.
+  for (auto &B : MF) {
+    auto F = BlockIndexes.find(&B);
+    assert(F != BlockIndexes.end());
+    HexagonBlockRanges::InstrIndexMap &IM = F->second;
+    HexagonBlockRanges::RegToRangeMap LM = HBR.computeLiveMap(IM);
+    HexagonBlockRanges::RegToRangeMap DM = HBR.computeDeadMap(IM, LM);
+    DEBUG(dbgs() << "BB#" << B.getNumber() << " dead map\n"
+                 << HexagonBlockRanges::PrintRangeMap(DM, HRI));
+
+    for (auto FI : BlockFIMap[&B]) {
+      if (BadFIs.count(FI))
+        continue;
+      DEBUG(dbgs() << "Working on fi#" << FI << '\n');
+      HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B];
+      for (auto &Range : RL) {
+        DEBUG(dbgs() << "--Examining range:" << RL << '\n');
+        if (!IndexType::isInstr(Range.start()) ||
+            !IndexType::isInstr(Range.end()))
+          continue;
+        MachineInstr *SI = IM.getInstr(Range.start());
+        MachineInstr *EI = IM.getInstr(Range.end());
+        assert(SI->mayStore() && "Unexpected start instruction");
+        assert(EI->mayLoad() && "Unexpected end instruction");
+        MachineOperand &SrcOp = SI->getOperand(2);
+
+        HexagonBlockRanges::RegisterRef SrcRR = { SrcOp.getReg(),
+                                                  SrcOp.getSubReg() };
+        auto *RC = getRegClass({SrcOp.getReg(), SrcOp.getSubReg()});
+        // The this-> is needed to unconfuse MSVC.
+        unsigned FoundR = this->findPhysReg(MF, Range, IM, DM, RC);
+        DEBUG(dbgs() << "Replacement reg:" << PrintReg(FoundR, &HRI) << '\n');
+        if (FoundR == 0)
+          continue;
+
+        // Generate the copy-in: "FoundR = COPY SrcR" at the store location.
+        MachineBasicBlock::iterator StartIt = SI, NextIt;
+        MachineInstr *CopyIn = nullptr;
+        if (SrcRR.Reg != FoundR || SrcRR.Sub != 0) {
+          DebugLoc DL = SI->getDebugLoc();
+          CopyIn = BuildMI(B, StartIt, DL, HII.get(TargetOpcode::COPY), FoundR)
+                      .addOperand(SrcOp);
+        }
+
+        ++StartIt;
+        // Check if this is a last store and the FI is live-on-exit.
+        if (LoxFIs.count(FI) && (&Range == &RL.back())) {
+          // Update store's source register.
+          if (unsigned SR = SrcOp.getSubReg())
+            SrcOp.setReg(HRI.getSubReg(FoundR, SR));
+          else
+            SrcOp.setReg(FoundR);
+          SrcOp.setSubReg(0);
+          // We are keeping this register live.
+          SrcOp.setIsKill(false);
+        } else {
+          B.erase(SI);
+          IM.replaceInstr(SI, CopyIn);
+        }
+
+        auto EndIt = std::next(MachineBasicBlock::iterator(EI));
+        for (auto It = StartIt; It != EndIt; It = NextIt) {
+          MachineInstr *MI = &*It;
+          NextIt = std::next(It);
+          int TFI;
+          if (!HII.isLoadFromStackSlot(MI, TFI) || TFI != FI)
+            continue;
+          unsigned DstR = MI->getOperand(0).getReg();
+          assert(MI->getOperand(0).getSubReg() == 0);
+          MachineInstr *CopyOut = nullptr;
+          if (DstR != FoundR) {
+            DebugLoc DL = MI->getDebugLoc();
+            unsigned MemSize = (1U << (HII.getMemAccessSize(MI) - 1));
+            assert(HII.getAddrMode(MI) == HexagonII::BaseImmOffset);
+            unsigned CopyOpc = TargetOpcode::COPY;
+            if (HII.isSignExtendingLoad(MI))
+              CopyOpc = (MemSize == 1) ? Hexagon::A2_sxtb : Hexagon::A2_sxth;
+            else if (HII.isZeroExtendingLoad(MI))
+              CopyOpc = (MemSize == 1) ? Hexagon::A2_zxtb : Hexagon::A2_zxth;
+            CopyOut = BuildMI(B, It, DL, HII.get(CopyOpc), DstR)
+                        .addReg(FoundR, getKillRegState(MI == EI));
+          }
+          IM.replaceInstr(MI, CopyOut);
+          B.erase(It);
+        }
+
+        // Update the dead map.
+        HexagonBlockRanges::RegisterRef FoundRR = { FoundR, 0 };
+        for (auto RR : HexagonBlockRanges::expandToSubRegs(FoundRR, MRI, HRI))
+          DM[RR].subtract(Range);
+      } // for Range in range list
+    }
+  }
+}
+
+
 void HexagonFrameLowering::expandAlloca(MachineInstr *AI,
       const HexagonInstrInfo &HII, unsigned SP, unsigned CF) const {
   MachineBasicBlock &MB = *AI->getParent();

Modified: llvm/trunk/lib/Target/Hexagon/HexagonFrameLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonFrameLowering.h?rev=260758&r1=260757&r2=260758&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonFrameLowering.h (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonFrameLowering.h Fri Feb 12 16:53:35 2016
@@ -11,6 +11,7 @@
 #define LLVM_LIB_TARGET_HEXAGON_HEXAGONFRAMELOWERING_H
 
 #include "Hexagon.h"
+#include "HexagonBlockRanges.h"
 #include "llvm/Target/TargetFrameLowering.h"
 
 namespace llvm {
@@ -124,6 +125,13 @@ private:
   bool expandSpillMacros(MachineFunction &MF,
       SmallVectorImpl<unsigned> &NewRegs) const;
 
+  unsigned findPhysReg(MachineFunction &MF, HexagonBlockRanges::IndexRange &FIR,
+      HexagonBlockRanges::InstrIndexMap &IndexMap,
+      HexagonBlockRanges::RegToRangeMap &DeadMap,
+      const TargetRegisterClass *RC) const;
+  void optimizeSpillSlots(MachineFunction &MF,
+      SmallVectorImpl<unsigned> &VRegs) const;
+
   void findShrunkPrologEpilog(MachineFunction &MF, MachineBasicBlock *&PrologB,
       MachineBasicBlock *&EpilogB) const;
 

Added: llvm/trunk/test/CodeGen/Hexagon/avoid-predspill-calleesaved.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/avoid-predspill-calleesaved.ll?rev=260758&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/avoid-predspill-calleesaved.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/avoid-predspill-calleesaved.ll Fri Feb 12 16:53:35 2016
@@ -0,0 +1,49 @@
+; Check that a callee-saved register will be saved correctly if
+; the predicate-to-GPR spilling code uses it.
+;
+; RUN: llc -march=hexagon < %s | FileCheck %s
+;
+; We expect to spill p0 into a general-purpose register and keep it there,
+; without adding an extra spill of that register.
+;
+; CHECK: PredSpill:
+; CHECK: memd(r29{{.*}}) = r17:16
+; CHECK-DAG: r{{[0-9]+}} = p0
+; CHECK-DAG: p0 = r{{[0-9]+}}
+; CHECK-NOT: = memw(r29
+;
+
+define void @PredSpill() {
+entry:
+  br i1 undef, label %if.then, label %if.else.14
+
+if.then:                                          ; preds = %entry
+  br i1 undef, label %if.end.57, label %if.else
+
+if.else:                                          ; preds = %if.then
+  unreachable
+
+if.else.14:                                       ; preds = %entry
+  br i1 undef, label %if.then.17, label %if.end.57
+
+if.then.17:                                       ; preds = %if.else.14
+  br i1 undef, label %if.end.57, label %if.then.20
+
+if.then.20:                                       ; preds = %if.then.17
+  %call21 = tail call i32 @myfun()
+  %tobool22 = icmp eq i32 %call21, 0
+  %0 = tail call i32 @myfun()
+  br i1 %tobool22, label %if.else.42, label %if.then.23
+
+if.then.23:                                       ; preds = %if.then.20
+  unreachable
+
+if.else.42:                                       ; preds = %if.then.20
+  ret void
+
+if.end.57:                                        ; preds = %if.then.17, %if.else.14, %if.then
+  ret void
+}
+
+declare i32 @myfun()
+

Modified: llvm/trunk/test/CodeGen/Hexagon/avoid-predspill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/avoid-predspill.ll?rev=260758&r1=260757&r2=260758&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/avoid-predspill.ll (original)
+++ llvm/trunk/test/CodeGen/Hexagon/avoid-predspill.ll Fri Feb 12 16:53:35 2016
@@ -1,6 +1,3 @@
-; This functionality will be restored shortly.
-; XFAIL: *
-
 ; RUN: llc -march=hexagon -O2 < %s | FileCheck %s
 ;
 ; This checks that predicate registers are moved to GPRs instead of spilling

Added: llvm/trunk/test/CodeGen/Hexagon/eliminate-pred-spill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/eliminate-pred-spill.ll?rev=260758&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/eliminate-pred-spill.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/eliminate-pred-spill.ll Fri Feb 12 16:53:35 2016
@@ -0,0 +1,144 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv60 -enable-hexagon-hvx-double \
+; RUN:     -hexagon-bit=0 < %s | FileCheck %s
+
+; This spill should be eliminated.
+; CHECK-NOT: vmem(r29+#6)
+
+define void @test(i8* noalias nocapture %key, i8* noalias nocapture %data1) #0 {
+entry:
+  %0 = bitcast i8* %key to <32 x i32>*
+  %1 = bitcast i8* %data1 to <32 x i32>*
+  br label %for.body
+
+for.body:
+  %pkey.0542 = phi <32 x i32>* [ %0, %entry ], [ null, %for.body ]
+  %pdata0.0541 = phi <32 x i32>* [ null, %entry ], [ %add.ptr48, %for.body ]
+  %pdata1.0540 = phi <32 x i32>* [ %1, %entry ], [ %add.ptr49, %for.body ]
+  %dAccum0.0539 = phi <64 x i32> [ undef, %entry ], [ %86, %for.body ]
+  %2 = load <32 x i32>, <32 x i32>* %pkey.0542, align 128
+  %3 = load <32 x i32>, <32 x i32>* %pdata0.0541, align 128
+  %4 = load <32 x i32>, <32 x i32>* undef, align 128
+  %arrayidx4 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 2
+  %5 = load <32 x i32>, <32 x i32>* %arrayidx4, align 128
+  %arrayidx5 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 2
+  %6 = load <32 x i32>, <32 x i32>* %arrayidx5, align 128
+  %7 = load <32 x i32>, <32 x i32>* null, align 128
+  %8 = load <32 x i32>, <32 x i32>* undef, align 128
+  %9 = load <32 x i32>, <32 x i32>* null, align 128
+  %arrayidx9 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 3
+  %arrayidx10 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 6
+  %10 = load <32 x i32>, <32 x i32>* %arrayidx10, align 128
+  %arrayidx12 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 4
+  %11 = load <32 x i32>, <32 x i32>* %arrayidx12, align 128
+  %arrayidx13 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 8
+  %arrayidx14 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 8
+  %12 = load <32 x i32>, <32 x i32>* %arrayidx14, align 128
+  %arrayidx15 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 5
+  %13 = load <32 x i32>, <32 x i32>* %arrayidx15, align 128
+  %arrayidx16 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 10
+  %arrayidx17 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 10
+  %14 = load <32 x i32>, <32 x i32>* %arrayidx17, align 128
+  %arrayidx18 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 6
+  %15 = load <32 x i32>, <32 x i32>* %arrayidx18, align 128
+  %arrayidx19 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 12
+  %16 = load <32 x i32>, <32 x i32>* %arrayidx19, align 128
+  %arrayidx20 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 12
+  %17 = load <32 x i32>, <32 x i32>* %arrayidx20, align 128
+  %arrayidx22 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 14
+  %18 = load <32 x i32>, <32 x i32>* %arrayidx22, align 128
+  %arrayidx23 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 14
+  %19 = load <32 x i32>, <32 x i32>* %arrayidx23, align 128
+  %20 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %2, <32 x i32> %11)
+  %21 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %11, <32 x i32> %2)
+  %22 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %2, <32 x i32> %11)
+  %23 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> undef, <32 x i32> %3)
+  %24 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %12, <32 x i32> undef)
+  %25 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %7, <32 x i32> %15)
+  %26 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %15, <32 x i32> %7)
+  %27 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %7, <32 x i32> %15)
+  %28 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %16, <32 x i32> %8)
+  %29 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %8, <32 x i32> %16)
+  %30 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %17, <32 x i32> %9)
+  %31 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %9, <32 x i32> %17)
+  %32 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %4, <32 x i32> %13)
+  %33 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %13, <32 x i32> %4)
+  %34 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %4, <32 x i32> %13)
+  %35 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> undef, <32 x i32> %5)
+  %36 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %5, <32 x i32> undef)
+  %37 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %14, <32 x i32> %6)
+  %38 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %6, <32 x i32> %14)
+  %39 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> undef)
+  %40 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> zeroinitializer)
+  %41 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %18, <32 x i32> %10)
+  %42 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %10, <32 x i32> %18)
+  %43 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %19, <32 x i32> undef)
+  %44 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> %19)
+  %45 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %21, <32 x i32> %26)
+  %46 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %26, <32 x i32> %21)
+  %47 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %21, <32 x i32> %26)
+  %48 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %28, <32 x i32> %23)
+  %49 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %23, <32 x i32> %28)
+  %50 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %30, <32 x i32> %24)
+  %51 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %24, <32 x i32> %30)
+  %52 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %22, <32 x i32> %27)
+  %53 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %27, <32 x i32> %22)
+  %54 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %22, <32 x i32> %27)
+  %55 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %29, <32 x i32> undef)
+  %56 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> undef, <32 x i32> %31)
+  %57 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %33, <32 x i32> %39)
+  %58 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %39, <32 x i32> %33)
+  %59 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %33, <32 x i32> %39)
+  %60 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %41, <32 x i32> %35)
+  %61 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %43, <32 x i32> %37)
+  %62 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %34, <32 x i32> %40)
+  %63 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %62, <32 x i32> %42, <32 x i32> %36)
+  %64 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %62, <32 x i32> %38, <32 x i32> %44)
+  %65 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %46, <32 x i32> %58)
+  %66 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %58, <32 x i32> %46)
+  %67 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %60, <32 x i32> %48)
+  %68 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %61, <32 x i32> %50)
+  %69 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %47, <32 x i32> %59)
+  %70 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %69, <32 x i32> %51, <32 x i32> zeroinitializer)
+  %71 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %53, <32 x i32> zeroinitializer)
+  %72 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %71, <32 x i32> %63, <32 x i32> %55)
+  %73 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %54, <32 x i32> undef)
+  %74 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %73, <32 x i32> %56, <32 x i32> %64)
+  %75 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %68, <32 x i32> %67)
+  %76 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %70, <32 x i32> undef)
+  %77 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> zeroinitializer, <32 x i32> %72)
+  %78 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %74, <32 x i32> zeroinitializer)
+  %79 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %dAccum0.0539, <32 x i32> %75, i32 65537)
+  %80 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %79, <32 x i32> zeroinitializer, i32 65537)
+  %81 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %80, <32 x i32> zeroinitializer, i32 65537)
+  %82 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %81, <32 x i32> %76, i32 65537)
+  %83 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %82, <32 x i32> %77, i32 65537)
+  %84 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %83, <32 x i32> zeroinitializer, i32 65537)
+  %85 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %84, <32 x i32> undef, i32 65537)
+  %86 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %85, <32 x i32> %78, i32 65537)
+  store <32 x i32> %66, <32 x i32>* %pkey.0542, align 128
+  store <32 x i32> %75, <32 x i32>* %pdata0.0541, align 128
+  store <32 x i32> zeroinitializer, <32 x i32>* %arrayidx4, align 128
+  store <32 x i32> zeroinitializer, <32 x i32>* undef, align 128
+  store <32 x i32> zeroinitializer, <32 x i32>* %arrayidx20, align 128
+  store <32 x i32> zeroinitializer, <32 x i32>* null, align 128
+  %add.ptr48 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 16
+  %add.ptr49 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 16
+  br i1 false, label %for.end, label %for.body
+
+for.end:
+  %87 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %86)
+  ret void
+}
+
+declare <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32>, <32 x i32>) #1
+
+declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
+
+declare <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32>, <32 x i32>) #1
+
+declare <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32>, <32 x i32>, i32) #1
+
+declare <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32>) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }

Added: llvm/trunk/test/CodeGen/Hexagon/reg-scavengebug-3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/reg-scavengebug-3.ll?rev=260758&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/reg-scavengebug-3.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/reg-scavengebug-3.ll Fri Feb 12 16:53:35 2016
@@ -0,0 +1,80 @@
+; RUN: llc -O0 -march=hexagon -mcpu=hexagonv60 < %s | FileCheck %s
+
+; CHECK: vmem
+
+target triple = "hexagon"
+
+ at vecpreds = external global [15 x <16 x i32>], align 64
+ at vectors = external global [15 x <16 x i32>], align 64
+ at vector_pairs = external global [15 x <32 x i32>], align 128
+ at .str1 = external hidden unnamed_addr constant [20 x i8], align 1
+ at .str2 = external hidden unnamed_addr constant [43 x i8], align 1
+ at Q6VecPredResult = external global <16 x i32>, align 64
+ at .str52 = external hidden unnamed_addr constant [57 x i8], align 1
+ at .str54 = external hidden unnamed_addr constant [59 x i8], align 1
+ at VectorResult = external global <16 x i32>, align 64
+ at .str243 = external hidden unnamed_addr constant [60 x i8], align 1
+ at .str251 = external hidden unnamed_addr constant [77 x i8], align 1
+ at .str290 = external hidden unnamed_addr constant [65 x i8], align 1
+ at VectorPairResult = external global <32 x i32>, align 128
+
+; Function Attrs: nounwind
+declare void @print_vector(i32, i8*) #0
+
+; Function Attrs: nounwind
+declare i32 @printf(i8*, ...) #0
+
+; Function Attrs: nounwind
+declare void @print_vecpred(i32, i8*) #0
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
+
+; Function Attrs: nounwind
+declare void @init_vectors() #0
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
+
+; Function Attrs: nounwind
+declare void @init_addresses() #0
+
+; Function Attrs: nounwind
+declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+  %0 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %1 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  call void @print_vecpred(i32 64, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*))
+  %2 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %call50 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([57 x i8], [57 x i8]* @.str52, i32 0, i32 0)) #3
+  %3 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %call52 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([59 x i8], [59 x i8]* @.str54, i32 0, i32 0)) #3
+  %4 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %call300 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str290, i32 0, i32 0)) #3
+  %5 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %6 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %call1373 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str1, i32 0, i32 0), i8* getelementptr inbounds ([43 x i8], [43 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @.str243, i32 0, i32 0)) #3
+  %7 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
+  %call1381 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str1, i32 0, i32 0), i8* getelementptr inbounds ([43 x i8], [43 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([77 x i8], [77 x i8]* @.str251, i32 0, i32 0)) #3
+  %8 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
+  %9 = call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %8, i32 16843009)
+  call void @print_vector(i32 64, i8* bitcast (<16 x i32>* @VectorResult to i8*))
+  %10 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
+  %11 = call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %10, i32 16843009)
+  %12 = bitcast <512 x i1> %11 to <16 x i32>
+  %13 = bitcast <16 x i32> %12 to <512 x i1>
+  %14 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1> %13, <16 x i32> undef, <16 x i32> undef)
+  store <16 x i32> %14, <16 x i32>* @VectorResult, align 64
+  ret i32 0
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind }

Added: llvm/trunk/test/CodeGen/Hexagon/vec-pred-spill1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/vec-pred-spill1.ll?rev=260758&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/vec-pred-spill1.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/vec-pred-spill1.ll Fri Feb 12 16:53:35 2016
@@ -0,0 +1,80 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv60 -O2 -enable-hexagon-hvx < %s | FileCheck %s
+
+; CHECK: vmem(r{{[0-9]+}}+#3) = v{{[0-9]+}}
+; CHECK: call puts
+; CHECK: call print_vecpred
+; CHECK: v{{[0-9]+}}{{ *}}={{ *}}vmem(r{{[0-9]+}}+#3)
+
+target triple = "hexagon"
+
+ at K = global i64 0, align 8
+ at src = global i32 -1, align 4
+ at Q6VecPredResult = common global <16 x i32> zeroinitializer, align 64
+ at dst_addresses = common global [15 x i64] zeroinitializer, align 8
+ at ptr_addresses = common global [15 x i8*] zeroinitializer, align 8
+ at src_addresses = common global [15 x i8*] zeroinitializer, align 8
+ at ptr = common global [32768 x i32] zeroinitializer, align 8
+ at vecpreds = common global [15 x <16 x i32>] zeroinitializer, align 64
+ at VectorResult = common global <16 x i32> zeroinitializer, align 64
+ at vectors = common global [15 x <16 x i32>] zeroinitializer, align 64
+ at VectorPairResult = common global <32 x i32> zeroinitializer, align 128
+ at vector_pairs = common global [15 x <32 x i32>] zeroinitializer, align 128
+ at str = private unnamed_addr constant [106 x i8] c"Q6VecPred4 :  Q6_Q_vandor_QVR(Q6_Q_vand_VR(Q6_V_vsplat_R(1+1),(0x01010101)),Q6_V_vsplat_R(0+1),INT32_MIN)\00"
+ at str3 = private unnamed_addr constant [99 x i8] c"Q6VecPred4 :  Q6_Q_vandor_QVR(Q6_Q_vand_VR(Q6_V_vsplat_R(1+1),(0x01010101)),Q6_V_vsplat_R(0+1),-1)\00"
+ at str4 = private unnamed_addr constant [98 x i8] c"Q6VecPred4 :  Q6_Q_vandor_QVR(Q6_Q_vand_VR(Q6_V_vsplat_R(1+1),(0x01010101)),Q6_V_vsplat_R(0+1),0)\00"
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+  %call = tail call i32 bitcast (i32 (...)* @init_addresses to i32 ()*)() #3
+  %call1 = tail call i32 @acquire_vector_unit(i8 zeroext 0) #3
+  tail call void @init_vectors() #3
+  %0 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 2)
+  %1 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %0, i32 16843009)
+  %2 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
+  %3 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %1, <16 x i32> %2, i32 -2147483648)
+  %4 = bitcast <512 x i1> %3 to <16 x i32>
+  store <16 x i32> %4, <16 x i32>* @Q6VecPredResult, align 64, !tbaa !1
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([106 x i8], [106 x i8]* @str, i32 0, i32 0))
+  tail call void @print_vecpred(i32 512, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) #3
+  %5 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %1, <16 x i32> %2, i32 -1)
+  %6 = bitcast <512 x i1> %5 to <16 x i32>
+  store <16 x i32> %6, <16 x i32>* @Q6VecPredResult, align 64, !tbaa !1
+  %puts5 = tail call i32 @puts(i8* getelementptr inbounds ([99 x i8], [99 x i8]* @str3, i32 0, i32 0))
+  tail call void @print_vecpred(i32 512, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) #3
+  %7 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %1, <16 x i32> %2, i32 0)
+  %8 = bitcast <512 x i1> %7 to <16 x i32>
+  store <16 x i32> %8, <16 x i32>* @Q6VecPredResult, align 64, !tbaa !1
+  %puts6 = tail call i32 @puts(i8* getelementptr inbounds ([98 x i8], [98 x i8]* @str4, i32 0, i32 0))
+  tail call void @print_vecpred(i32 512, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) #3
+  ret i32 0
+}
+
+declare i32 @init_addresses(...) #1
+
+declare i32 @acquire_vector_unit(i8 zeroext) #1
+
+declare void @init_vectors() #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1>, <16 x i32>, i32) #2
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #2
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #2
+
+declare void @print_vecpred(i32, i8*) #1
+
+; Function Attrs: nounwind
+declare i32 @puts(i8* nocapture readonly) #3
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!1 = !{!2, !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C/C++ TBAA"}




More information about the llvm-commits mailing list