[llvm] r236896 - [Hexagon] Generate more hardware loops

Brendon Cahoon bcahoon at codeaurora.org
Fri May 8 13:18:21 PDT 2015


Author: bcahoon
Date: Fri May  8 15:18:21 2015
New Revision: 236896

URL: http://llvm.org/viewvc/llvm-project?rev=236896&view=rev
Log:
[Hexagon] Generate more hardware loops

Refactored parts of the hardware loop pass to generate
more. Also, added more tests.

Differential Revision: http://reviews.llvm.org/D9568

Added:
    llvm/trunk/test/CodeGen/Hexagon/hwloop-missed.ll
    llvm/trunk/test/CodeGen/Hexagon/hwloop-preheader.ll
    llvm/trunk/test/CodeGen/Hexagon/hwloop1.ll
    llvm/trunk/test/CodeGen/Hexagon/hwloop2.ll
    llvm/trunk/test/CodeGen/Hexagon/hwloop3.ll
    llvm/trunk/test/CodeGen/Hexagon/hwloop4.ll
Modified:
    llvm/trunk/lib/Target/Hexagon/HexagonHardwareLoops.cpp
    llvm/trunk/test/CodeGen/Hexagon/hwloop-lt.ll

Modified: llvm/trunk/lib/Target/Hexagon/HexagonHardwareLoops.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonHardwareLoops.cpp?rev=236896&r1=236895&r2=236896&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonHardwareLoops.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonHardwareLoops.cpp Fri May  8 15:18:21 2015
@@ -21,7 +21,6 @@
 //  - Countable loops (w/ ind. var for a trip count)
 //  - Assumes loops are normalized by IndVarSimplify
 //  - Try inner-most loops first
-//  - No nested hardware loops.
 //  - No function calls in loops.
 //
 //===----------------------------------------------------------------------===//
@@ -49,9 +48,18 @@ using namespace llvm;
 #define DEBUG_TYPE "hwloops"
 
 #ifndef NDEBUG
-static cl::opt<int> HWLoopLimit("max-hwloop", cl::Hidden, cl::init(-1));
+static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1));
+
+// Option to create preheader only for a specific function.
+static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden,
+                                 cl::init(""));
 #endif
 
+// Option to create a preheader if one doesn't exist.
+static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",
+    cl::Hidden, cl::init(true),
+    cl::desc("Add a preheader to a hardware loop if one doesn't exist"));
+
 STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
 
 namespace llvm {
@@ -87,14 +95,15 @@ namespace {
     }
 
   private:
+
     /// Kinds of comparisons in the compare instructions.
     struct Comparison {
       enum Kind {
         EQ  = 0x01,
         NE  = 0x02,
-        L   = 0x04, // Less-than property.
-        G   = 0x08, // Greater-than property.
-        U   = 0x40, // Unsigned property.
+        L   = 0x04,
+        G   = 0x08,
+        U   = 0x40,
         LTs = L,
         LEs = L | EQ,
         GTs = G,
@@ -111,6 +120,23 @@ namespace {
           return (Kind)(Cmp ^ (L|G));
         return Cmp;
       }
+
+      static Kind getNegatedComparison(Kind Cmp) {
+        if ((Cmp & L) || (Cmp & G))
+          return (Kind)((Cmp ^ (L | G)) ^ EQ);
+        if ((Cmp & NE) || (Cmp & EQ))
+          return (Kind)(Cmp ^ (EQ | NE));
+        return (Kind)0;
+      }
+
+      static bool isSigned(Kind Cmp) {
+        return (Cmp & (L | G) && !(Cmp & U));
+      }
+
+      static bool isUnsigned(Kind Cmp) {
+        return (Cmp & U);
+      }
+
     };
 
     /// \brief Find the register that contains the loop controlling
@@ -128,6 +154,12 @@ namespace {
     bool findInductionRegister(MachineLoop *L, unsigned &Reg,
                                int64_t &IVBump, MachineInstr *&IVOp) const;
 
+    /// \brief Return the comparison kind for the specified opcode.
+    Comparison::Kind getComparisonKind(unsigned CondOpc,
+                                       MachineOperand *InitialValue,
+                                       const MachineOperand *Endvalue,
+                                       int64_t IVBump) const;
+    
     /// \brief Analyze the statements in a loop to determine if the loop
     /// has a computable trip count and, if so, return a value that represents
     /// the trip count expression.
@@ -141,12 +173,9 @@ namespace {
     /// If the trip count is not directly available (as an immediate value,
     /// or a register), the function will attempt to insert computation of it
     /// to the loop's preheader.
-    CountValue *computeCount(MachineLoop *Loop,
-                             const MachineOperand *Start,
-                             const MachineOperand *End,
-                             unsigned IVReg,
-                             int64_t IVBump,
-                             Comparison::Kind Cmp) const;
+    CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start,
+                             const MachineOperand *End, unsigned IVReg,
+                             int64_t IVBump, Comparison::Kind Cmp) const;
 
     /// \brief Return true if the instruction is not valid within a hardware
     /// loop.
@@ -310,6 +339,18 @@ bool HexagonHardwareLoops::runOnMachineF
   return Changed;
 }
 
+/// \brief Return the latch block if it's one of the exiting blocks. Otherwise,
+/// return the exiting block. Return 'null' when multiple exiting blocks are
+/// present.
+static MachineBasicBlock* getExitingBlock(MachineLoop *L) {
+  if (MachineBasicBlock *Latch = L->getLoopLatch()) {
+    if (L->isLoopExiting(Latch))
+      return Latch;
+    else
+      return L->getExitingBlock();
+  }
+  return nullptr;
+}
 
 bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
                                                  unsigned &Reg,
@@ -319,7 +360,8 @@ bool HexagonHardwareLoops::findInduction
   MachineBasicBlock *Header = L->getHeader();
   MachineBasicBlock *Preheader = L->getLoopPreheader();
   MachineBasicBlock *Latch = L->getLoopLatch();
-  if (!Header || !Preheader || !Latch)
+  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
+  if (!Header || !Preheader || !Latch || !ExitingBlock)
     return false;
 
   // This pair represents an induction register together with an immediate
@@ -366,10 +408,10 @@ bool HexagonHardwareLoops::findInduction
 
   SmallVector<MachineOperand,2> Cond;
   MachineBasicBlock *TB = nullptr, *FB = nullptr;
-  bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
+  bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false);
   if (NotAnalyzed)
     return false;
-  
+
   unsigned PredR, PredPos, PredRegFlags;
   if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags))
     return false;
@@ -384,7 +426,7 @@ bool HexagonHardwareLoops::findInduction
                                          CmpMask, CmpImm);
   // Fail if the compare was not analyzed, or it's not comparing a register
   // with an immediate value.  Not checking the mask here, since we handle
-  // the individual compare opcodes (including CMPb) later on.
+  // the individual compare opcodes (including A4_cmpb*) later on.
   if (!CmpAnalyzed)
     return false;
 
@@ -414,6 +456,44 @@ bool HexagonHardwareLoops::findInduction
   return true;
 }
 
+// Return the comparison kind for the specified opcode.
+HexagonHardwareLoops::Comparison::Kind
+HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
+                                        MachineOperand *InitialValue,
+                                        const MachineOperand *EndValue,
+                                        int64_t IVBump) const {
+  Comparison::Kind Cmp = (Comparison::Kind)0;
+  switch (CondOpc) {
+  case Hexagon::C2_cmpeqi:
+  case Hexagon::C2_cmpeq:
+  case Hexagon::C2_cmpeqp:
+    Cmp = Comparison::Kind::EQ;
+    break;
+  case Hexagon::C4_cmpneq:
+  case Hexagon::C4_cmpneqi:
+    Cmp = Comparison::Kind::NE;
+    break;
+  case Hexagon::C4_cmplte:
+    Cmp = Comparison::Kind::LEs;
+    break;
+  case Hexagon::C4_cmplteu:
+    Cmp = Comparison::Kind::LEu;
+    break;
+  case Hexagon::C2_cmpgtui:
+  case Hexagon::C2_cmpgtu:
+  case Hexagon::C2_cmpgtup:
+    Cmp = Comparison::Kind::GTu;
+    break;
+  case Hexagon::C2_cmpgti:
+  case Hexagon::C2_cmpgt:
+  case Hexagon::C2_cmpgtp:
+    Cmp = Comparison::Kind::GTs;
+    break;
+  default:
+    return (Comparison::Kind)0;
+  }
+  return Cmp;
+}
 
 /// \brief Analyze the statements in a loop to determine if the loop has
 /// a computable trip count and, if so, return a value that represents
@@ -423,7 +503,7 @@ bool HexagonHardwareLoops::findInduction
 /// induction variable patterns that are used in the calculation for
 /// the number of time the loop is executed.
 CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
-                                    SmallVectorImpl<MachineInstr *> &OldInsts) {
+    SmallVectorImpl<MachineInstr *> &OldInsts) {
   MachineBasicBlock *TopMBB = L->getTopBlock();
   MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
   assert(PI != TopMBB->pred_end() &&
@@ -447,8 +527,8 @@ CountValue *HexagonHardwareLoops::getLoo
   // Look for the cmp instruction to determine if we can get a useful trip
   // count.  The trip count can be either a register or an immediate.  The
   // location of the value depends upon the type (reg or imm).
-  MachineBasicBlock *Latch = L->getLoopLatch();
-  if (!Latch)
+  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
+  if (!ExitingBlock)
     return nullptr;
 
   unsigned IVReg = 0;
@@ -462,6 +542,7 @@ CountValue *HexagonHardwareLoops::getLoo
 
   MachineOperand *InitialValue = nullptr;
   MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
+  MachineBasicBlock *Latch = L->getLoopLatch();
   for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
     MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
     if (MBB == Preheader)
@@ -483,6 +564,17 @@ CountValue *HexagonHardwareLoops::getLoo
   // the header.  Otherwise, branch to TB could be exiting the loop, and
   // the fall through can go to the header.
   assert (TB && "Latch block without a branch?");
+  if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
+    MachineBasicBlock *LTB = 0, *LFB = 0;
+    SmallVector<MachineOperand,2> LCond;
+    bool NotAnalyzed = TII->AnalyzeBranch(*Latch, LTB, LFB, LCond, false);
+    if (NotAnalyzed)
+      return nullptr;
+    if (TB == Latch)
+      (LTB == Header) ? TB = LTB: TB = LFB;
+    else // FB == Latch
+      (LTB == Header) ? FB = LTB: FB = LFB;
+  }
   assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
   if (!TB || (FB && TB != Header && FB != Header))
     return nullptr;
@@ -533,57 +625,13 @@ CountValue *HexagonHardwareLoops::getLoo
   if (!EndValue)
     return nullptr;
 
-  switch (CondOpc) {
-    case Hexagon::C2_cmpeqi:
-    case Hexagon::C2_cmpeq:
-      Cmp = !Negated ? Comparison::EQ : Comparison::NE;
-      break;
-    case Hexagon::C2_cmpgtui:
-    case Hexagon::C2_cmpgtu:
-      Cmp = !Negated ? Comparison::GTu : Comparison::LEu;
-      break;
-    case Hexagon::C2_cmpgti:
-    case Hexagon::C2_cmpgt:
-      Cmp = !Negated ? Comparison::GTs : Comparison::LEs;
-      break;
-    // Very limited support for byte/halfword compares.
-    case Hexagon::A4_cmpbeqi:
-    case Hexagon::A4_cmpheqi: {
-      if (IVBump != 1)
-        return nullptr;
-
-      int64_t InitV, EndV;
-      // Since the comparisons are "ri", the EndValue should be an
-      // immediate.  Check it just in case.
-      assert(EndValue->isImm() && "Unrecognized latch comparison");
-      EndV = EndValue->getImm();
-      // Allow InitialValue to be a register defined with an immediate.
-      if (InitialValue->isReg()) {
-        if (!defWithImmediate(InitialValue->getReg()))
-          return nullptr;
-        InitV = getImmediate(*InitialValue);
-      } else {
-        assert(InitialValue->isImm());
-        InitV = InitialValue->getImm();
-      }
-      if (InitV >= EndV)
-        return nullptr;
-      if (CondOpc == Hexagon::A4_cmpbeqi) {
-        if (!isInt<8>(InitV) || !isInt<8>(EndV))
-          return nullptr;
-      } else {  // Hexagon::CMPhEQri_V4
-        if (!isInt<16>(InitV) || !isInt<16>(EndV))
-          return nullptr;
-      }
-      Cmp = !Negated ? Comparison::EQ : Comparison::NE;
-      break;
-    }
-    default:
-      return nullptr;
-  }
-
+  Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump);
+  if (!Cmp)
+    return nullptr;
+  if (Negated)
+    Cmp = Comparison::getNegatedComparison(Cmp);
   if (isSwapped)
-   Cmp = Comparison::getSwappedComparison(Cmp);
+    Cmp = Comparison::getSwappedComparison(Cmp);
 
   if (InitialValue->isReg()) {
     unsigned R = InitialValue->getReg();
@@ -637,13 +685,14 @@ CountValue *HexagonHardwareLoops::comput
   bool CmpHasEqual = Cmp & Comparison::EQ;
 
   // Avoid certain wrap-arounds.  This doesn't detect all wrap-arounds.
-  // If loop executes while iv is "less" with the iv value going down, then
-  // the iv must wrap.
   if (CmpLess && IVBump < 0)
+    // Loop going while iv is "less" with the iv value going down.  Must wrap.
     return nullptr;
+
   // If loop executes while iv is "greater" with the iv value going up, then
   // the iv must wrap.
   if (CmpGreater && IVBump > 0)
+    // Loop going while iv is "greater" with the iv value going up.  Must wrap.
     return nullptr;
 
   if (Start->isImm() && End->isImm()) {
@@ -698,8 +747,9 @@ CountValue *HexagonHardwareLoops::comput
   MachineBasicBlock *PH = Loop->getLoopPreheader();
   assert (PH && "Should have a preheader by now");
   MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator();
-  DebugLoc DL = (InsertPos != PH->end()) ? InsertPos->getDebugLoc()
-                                         : DebugLoc();
+  DebugLoc DL;
+  if (InsertPos != PH->end())
+    InsertPos->getDebugLoc();
 
   // If Start is an immediate and End is a register, the trip count
   // will be "reg - imm".  Hexagon's "subtract immediate" instruction
@@ -778,21 +828,35 @@ CountValue *HexagonHardwareLoops::comput
     const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :
                               (RegToImm ? TII->get(Hexagon::A2_subri) :
                                           TII->get(Hexagon::A2_addi));
-    unsigned SubR = MRI->createVirtualRegister(IntRC);
-    MachineInstrBuilder SubIB =
-      BuildMI(*PH, InsertPos, DL, SubD, SubR);
-
-    if (RegToReg) {
-      SubIB.addReg(End->getReg(), 0, End->getSubReg())
-           .addReg(Start->getReg(), 0, Start->getSubReg());
-    } else if (RegToImm) {
-      SubIB.addImm(EndV)
-           .addReg(Start->getReg(), 0, Start->getSubReg());
-    } else { // ImmToReg
-      SubIB.addReg(End->getReg(), 0, End->getSubReg())
-           .addImm(-StartV);
+    if (RegToReg || RegToImm) {    
+      unsigned SubR = MRI->createVirtualRegister(IntRC);
+      MachineInstrBuilder SubIB =
+        BuildMI(*PH, InsertPos, DL, SubD, SubR);
+
+      if (RegToReg)
+        SubIB.addReg(End->getReg(), 0, End->getSubReg())
+          .addReg(Start->getReg(), 0, Start->getSubReg());
+      else
+        SubIB.addImm(EndV)
+          .addReg(Start->getReg(), 0, Start->getSubReg());
+      DistR = SubR;
+    } else {
+      // If the loop has been unrolled, we should use the original loop count
+      // instead of recalculating the value. This will avoid additional
+      // 'Add' instruction.
+      const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
+      if (EndValInstr->getOpcode() == Hexagon::A2_addi &&
+          EndValInstr->getOperand(2).getImm() == StartV) {
+        DistR = EndValInstr->getOperand(1).getReg();
+      } else {
+        unsigned SubR = MRI->createVirtualRegister(IntRC);
+        MachineInstrBuilder SubIB =
+          BuildMI(*PH, InsertPos, DL, SubD, SubR);
+        SubIB.addReg(End->getReg(), 0, End->getSubReg())
+             .addImm(-StartV);
+        DistR = SubR;
+      }
     }
-    DistR = SubR;
     DistSR = 0;
   }
 
@@ -843,8 +907,9 @@ CountValue *HexagonHardwareLoops::comput
 bool HexagonHardwareLoops::isInvalidLoopOperation(
       const MachineInstr *MI) const {
 
-  // call is not allowed because the callee may use a hardware loop
-  if (MI->getDesc().isCall())
+  // Call is not allowed because the callee may use a hardware loop except for
+  // the case when the call never returns.
+  if (MI->getDesc().isCall() && MI->getOpcode() != Hexagon::CALLv3nr)
     return true;
 
   // do not allow nested hardware loops
@@ -959,8 +1024,6 @@ void HexagonHardwareLoops::removeIfDead(
           continue;
         if (Use.isDebug())
           UseMI->getOperand(0).setReg(0U);
-        // This may also be a "instr -> phi -> instr" case which can
-        // be removed too.
       }
     }
 
@@ -1005,10 +1068,6 @@ bool HexagonHardwareLoops::convertToHard
   if (containsInvalidInstruction(L))
     return false;
 
-  // Is the induction variable bump feeding the latch condition?
-  if (!fixupInductionVariable(L))
-    return false;
-
   MachineBasicBlock *LastMBB = L->getExitingBlock();
   // Don't generate hw loop if the loop has more than one exit.
   if (!LastMBB)
@@ -1018,16 +1077,19 @@ bool HexagonHardwareLoops::convertToHard
   if (LastI == LastMBB->end())
     return false;
 
+  // Is the induction variable bump feeding the latch condition?
+  if (!fixupInductionVariable(L))
+    return false;
+
   // Ensure the loop has a preheader: the loop instruction will be
   // placed there.
-  bool NewPreheader = false;
   MachineBasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) {
     Preheader = createPreheaderForLoop(L);
     if (!Preheader)
       return false;
-    NewPreheader = true;
   }
+
   MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
 
   SmallVector<MachineInstr*, 2> OldInsts;
@@ -1042,31 +1104,30 @@ bool HexagonHardwareLoops::convertToHard
     // so make sure that the register is actually defined at that point.
     MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
     MachineBasicBlock *BBDef = TCDef->getParent();
-    if (!NewPreheader) {
-      if (!MDT->dominates(BBDef, Preheader))
-        return false;
-    } else {
-      // If we have just created a preheader, the dominator tree won't be
-      // aware of it.  Check if the definition of the register dominates
-      // the header, but is not the header itself.
-      if (!MDT->properlyDominates(BBDef, L->getHeader()))
-        return false;
-    }
+    if (!MDT->dominates(BBDef, Preheader))
+      return false;
   }
 
   // Determine the loop start.
-  MachineBasicBlock *LoopStart = L->getTopBlock();
-  if (L->getLoopLatch() != LastMBB) {
-    // When the exit and latch are not the same, use the latch block as the
-    // start.
-    // The loop start address is used only after the 1st iteration, and the
-    // loop latch may contains instrs. that need to be executed after the
-    // first iteration.
-    LoopStart = L->getLoopLatch();
-    // Make sure the latch is a successor of the exit, otherwise it won't work.
-    if (!LastMBB->isSuccessor(LoopStart))
+  MachineBasicBlock *TopBlock = L->getTopBlock();
+  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
+  MachineBasicBlock *LoopStart = 0;
+  if (ExitingBlock !=  L->getLoopLatch()) {
+    MachineBasicBlock *TB = 0, *FB = 0;
+    SmallVector<MachineOperand, 2> Cond;
+
+    if (TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false))
+      return false;
+
+    if (L->contains(TB))
+      LoopStart = TB;
+    else if (L->contains(FB))
+      LoopStart = FB;
+    else
       return false;
   }
+  else
+    LoopStart = TopBlock;
 
   // Convert the loop to a hardware loop.
   DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
@@ -1220,13 +1281,7 @@ void HexagonHardwareLoops::setImmediate(
 
   assert(MO.isReg());
   unsigned R = MO.getReg();
-  MachineInstr *DI = defWithImmediate(R);
-  if (MRI->hasOneNonDBGUse(R)) {
-    // If R has only one use, then just change its defining instruction to
-    // the new immediate value.
-    DI->getOperand(1).setImm(Val);
-    return;
-  }
+  MachineInstr *DI = MRI->getVRegDef(R);
 
   const TargetRegisterClass *RC = MRI->getRegClass(R);
   unsigned NewR = MRI->createVirtualRegister(RC);
@@ -1240,10 +1295,10 @@ void HexagonHardwareLoops::setImmediate(
 
 bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
   MachineBasicBlock *Header = L->getHeader();
-  MachineBasicBlock *Preheader = L->getLoopPreheader();
   MachineBasicBlock *Latch = L->getLoopLatch();
+  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
 
-  if (!Header || !Preheader || !Latch)
+  if (!(Header && Latch && ExitingBlock))
     return false;
 
   // These data structures follow the same concept as the corresponding
@@ -1271,7 +1326,7 @@ bool HexagonHardwareLoops::fixupInductio
       unsigned PhiReg = Phi->getOperand(i).getReg();
       MachineInstr *DI = MRI->getVRegDef(PhiReg);
       unsigned UpdOpc = DI->getOpcode();
-      bool isAdd = (UpdOpc == Hexagon::A2_addi);
+      bool isAdd = (UpdOpc == Hexagon::A2_addi || UpdOpc == Hexagon::A2_addp);
 
       if (isAdd) {
         // If the register operand to the add/sub is the PHI we are looking
@@ -1412,12 +1467,21 @@ MachineBasicBlock *HexagonHardwareLoops:
   if (MachineBasicBlock *TmpPH = L->getLoopPreheader())
     return TmpPH;
 
+  if (!HWCreatePreheader)
+    return nullptr;
+
   MachineBasicBlock *Header = L->getHeader();
   MachineBasicBlock *Latch = L->getLoopLatch();
+  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
   MachineFunction *MF = Header->getParent();
   DebugLoc DL;
 
-  if (!Latch || Header->hasAddressTaken())
+#ifndef NDEBUG
+  if ((PHFn != "") && (PHFn != MF->getName()))
+    return nullptr;
+#endif
+
+  if (!Latch || !ExitingBlock || Header->hasAddressTaken())
     return nullptr;
 
   typedef MachineBasicBlock::instr_iterator instr_iterator;
@@ -1429,16 +1493,14 @@ MachineBasicBlock *HexagonHardwareLoops:
   SmallVector<MachineOperand,2> Tmp1;
   MachineBasicBlock *TB = nullptr, *FB = nullptr;
 
-  if (TII->AnalyzeBranch(*Latch, TB, FB, Tmp1, false))
+  if (TII->AnalyzeBranch(*ExitingBlock, TB, FB, Tmp1, false))
     return nullptr;
 
   for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
     MachineBasicBlock *PB = *I;
-    if (PB != Latch) {
-      bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false);
-      if (NotAnalyzed)
-        return nullptr;
-    }
+    bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false);
+    if (NotAnalyzed)
+      return nullptr;
   }
 
   MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock();
@@ -1541,5 +1603,16 @@ MachineBasicBlock *HexagonHardwareLoops:
   TII->InsertBranch(*NewPH, Header, nullptr, EmptyCond, DL);
   NewPH->addSuccessor(Header);
 
+  MachineLoop *ParentLoop = L->getParentLoop();
+  if (ParentLoop)
+    ParentLoop->addBasicBlockToLoop(NewPH, MLI->getBase());
+
+  // Update the dominator information with the new preheader.
+  if (MDT) {
+    MachineDomTreeNode *HDom = MDT->getNode(Header);
+    MDT->addNewBlock(NewPH, HDom->getIDom()->getBlock());
+    MDT->changeImmediateDominator(Header, NewPH);
+  }
+
   return NewPH;
 }

Modified: llvm/trunk/test/CodeGen/Hexagon/hwloop-lt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/hwloop-lt.ll?rev=236896&r1=236895&r2=236896&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/hwloop-lt.ll (original)
+++ llvm/trunk/test/CodeGen/Hexagon/hwloop-lt.ll Fri May  8 15:18:21 2015
@@ -1,7 +1,6 @@
 ; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
 
-
-; CHECK: test_pos1_ir_slt
+; CHECK-LABEL: @test_pos1_ir_slt
 ; CHECK: loop0
 ; a < b
 define void @test_pos1_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -9,10 +8,10 @@ entry:
   %cmp3 = icmp slt i32 8531, %b
   br i1 %cmp3, label %for.body.lr.ph, label %for.end
 
-for.body.lr.ph:                                   ; preds = %entry
+for.body.lr.ph:
   br label %for.body
 
-for.body:                                         ; preds = %for.body.lr.ph, %for.body
+for.body:
   %i.04 = phi i32 [ 8531, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
   %0 = load i8, i8* %arrayidx, align 1
@@ -24,13 +23,11 @@ for.body:
   %cmp = icmp slt i32 %inc, %b
   br i1 %cmp, label %for.body, label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:
   ret void
 }
 
-
-
-; CHECK: test_pos2_ir_slt
+; CHECK-LABEL: @test_pos2_ir_slt
 ; CHECK: loop0
 ; a < b
 define void @test_pos2_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -38,10 +35,10 @@ entry:
   %cmp3 = icmp slt i32 9152, %b
   br i1 %cmp3, label %for.body.lr.ph, label %for.end
 
-for.body.lr.ph:                                   ; preds = %entry
+for.body.lr.ph:
   br label %for.body
 
-for.body:                                         ; preds = %for.body.lr.ph, %for.body
+for.body:
   %i.04 = phi i32 [ 9152, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
   %0 = load i8, i8* %arrayidx, align 1
@@ -53,13 +50,11 @@ for.body:
   %cmp = icmp slt i32 %inc, %b
   br i1 %cmp, label %for.body, label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:
   ret void
 }
 
-
-
-; CHECK: test_pos4_ir_slt
+; CHECK-LABEL: @test_pos4_ir_slt
 ; CHECK: loop0
 ; a < b
 define void @test_pos4_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -67,10 +62,10 @@ entry:
   %cmp3 = icmp slt i32 18851, %b
   br i1 %cmp3, label %for.body.lr.ph, label %for.end
 
-for.body.lr.ph:                                   ; preds = %entry
+for.body.lr.ph:
   br label %for.body
 
-for.body:                                         ; preds = %for.body.lr.ph, %for.body
+for.body:
   %i.04 = phi i32 [ 18851, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
   %0 = load i8, i8* %arrayidx, align 1
@@ -82,13 +77,11 @@ for.body:
   %cmp = icmp slt i32 %inc, %b
   br i1 %cmp, label %for.body, label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:
   ret void
 }
 
-
-
-; CHECK: test_pos8_ir_slt
+; CHECK-LABEL: @test_pos8_ir_slt
 ; CHECK: loop0
 ; a < b
 define void @test_pos8_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -96,10 +89,10 @@ entry:
   %cmp3 = icmp slt i32 25466, %b
   br i1 %cmp3, label %for.body.lr.ph, label %for.end
 
-for.body.lr.ph:                                   ; preds = %entry
+for.body.lr.ph:
   br label %for.body
 
-for.body:                                         ; preds = %for.body.lr.ph, %for.body
+for.body:
   %i.04 = phi i32 [ 25466, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
   %0 = load i8, i8* %arrayidx, align 1
@@ -111,13 +104,11 @@ for.body:
   %cmp = icmp slt i32 %inc, %b
   br i1 %cmp, label %for.body, label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:
   ret void
 }
 
-
-
-; CHECK: test_pos16_ir_slt
+; CHECK-LABEL: @test_pos16_ir_slt
 ; CHECK: loop0
 ; a < b
 define void @test_pos16_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -125,10 +116,10 @@ entry:
   %cmp3 = icmp slt i32 9295, %b
   br i1 %cmp3, label %for.body.lr.ph, label %for.end
 
-for.body.lr.ph:                                   ; preds = %entry
+for.body.lr.ph:
   br label %for.body
 
-for.body:                                         ; preds = %for.body.lr.ph, %for.body
+for.body:
   %i.04 = phi i32 [ 9295, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
   %0 = load i8, i8* %arrayidx, align 1
@@ -140,13 +131,11 @@ for.body:
   %cmp = icmp slt i32 %inc, %b
   br i1 %cmp, label %for.body, label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:
   ret void
 }
 
-
-
-; CHECK: test_pos1_ri_slt
+; CHECK-LABEL: @test_pos1_ri_slt
 ; CHECK: loop0
 ; a < b
 define void @test_pos1_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -154,10 +143,10 @@ entry:
   %cmp3 = icmp slt i32 %a, 31236
   br i1 %cmp3, label %for.body.lr.ph, label %for.end
 
-for.body.lr.ph:                                   ; preds = %entry
+for.body.lr.ph:
   br label %for.body
 
-for.body:                                         ; preds = %for.body.lr.ph, %for.body
+for.body:
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
   %0 = load i8, i8* %arrayidx, align 1
@@ -169,13 +158,11 @@ for.body:
   %cmp = icmp slt i32 %inc, 31236
   br i1 %cmp, label %for.body, label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:
   ret void
 }
 
-
-
-; CHECK: test_pos2_ri_slt
+; CHECK-LABEL: @test_pos2_ri_slt
 ; CHECK: loop0
 ; a < b
 define void @test_pos2_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -183,10 +170,10 @@ entry:
   %cmp3 = icmp slt i32 %a, 22653
   br i1 %cmp3, label %for.body.lr.ph, label %for.end
 
-for.body.lr.ph:                                   ; preds = %entry
+for.body.lr.ph:
   br label %for.body
 
-for.body:                                         ; preds = %for.body.lr.ph, %for.body
+for.body:
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
   %0 = load i8, i8* %arrayidx, align 1
@@ -198,13 +185,11 @@ for.body:
   %cmp = icmp slt i32 %inc, 22653
   br i1 %cmp, label %for.body, label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:
   ret void
 }
 
-
-
-; CHECK: test_pos4_ri_slt
+; CHECK-LABEL: @test_pos4_ri_slt
 ; CHECK: loop0
 ; a < b
 define void @test_pos4_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -212,10 +197,10 @@ entry:
   %cmp3 = icmp slt i32 %a, 1431
   br i1 %cmp3, label %for.body.lr.ph, label %for.end
 
-for.body.lr.ph:                                   ; preds = %entry
+for.body.lr.ph:
   br label %for.body
 
-for.body:                                         ; preds = %for.body.lr.ph, %for.body
+for.body:
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
   %0 = load i8, i8* %arrayidx, align 1
@@ -227,13 +212,11 @@ for.body:
   %cmp = icmp slt i32 %inc, 1431
   br i1 %cmp, label %for.body, label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:
   ret void
 }
 
-
-
-; CHECK: test_pos8_ri_slt
+; CHECK-LABEL: @test_pos8_ri_slt
 ; CHECK: loop0
 ; a < b
 define void @test_pos8_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -241,10 +224,10 @@ entry:
   %cmp3 = icmp slt i32 %a, 22403
   br i1 %cmp3, label %for.body.lr.ph, label %for.end
 
-for.body.lr.ph:                                   ; preds = %entry
+for.body.lr.ph:
   br label %for.body
 
-for.body:                                         ; preds = %for.body.lr.ph, %for.body
+for.body:
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
   %0 = load i8, i8* %arrayidx, align 1
@@ -256,13 +239,11 @@ for.body:
   %cmp = icmp slt i32 %inc, 22403
   br i1 %cmp, label %for.body, label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:
   ret void
 }
 
-
-
-; CHECK: test_pos16_ri_slt
+; CHECK-LABEL: @test_pos16_ri_slt
 ; CHECK: loop0
 ; a < b
 define void @test_pos16_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -270,10 +251,10 @@ entry:
   %cmp3 = icmp slt i32 %a, 21715
   br i1 %cmp3, label %for.body.lr.ph, label %for.end
 
-for.body.lr.ph:                                   ; preds = %entry
+for.body.lr.ph:
   br label %for.body
 
-for.body:                                         ; preds = %for.body.lr.ph, %for.body
+for.body:
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
   %0 = load i8, i8* %arrayidx, align 1
@@ -285,13 +266,11 @@ for.body:
   %cmp = icmp slt i32 %inc, 21715
   br i1 %cmp, label %for.body, label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:
   ret void
 }
 
-
-
-; CHECK: test_pos1_rr_slt
+; CHECK-LABEL: @test_pos1_rr_slt
 ; CHECK: loop0
 ; a < b
 define void @test_pos1_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -299,10 +278,10 @@ entry:
   %cmp3 = icmp slt i32 %a, %b
   br i1 %cmp3, label %for.body.lr.ph, label %for.end
 
-for.body.lr.ph:                                   ; preds = %entry
+for.body.lr.ph:
   br label %for.body
 
-for.body:                                         ; preds = %for.body.lr.ph, %for.body
+for.body:
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
   %0 = load i8, i8* %arrayidx, align 1
@@ -314,13 +293,11 @@ for.body:
   %cmp = icmp slt i32 %inc, %b
   br i1 %cmp, label %for.body, label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:
   ret void
 }
 
-
-
-; CHECK: test_pos2_rr_slt
+; CHECK-LABEL: @test_pos2_rr_slt
 ; CHECK: loop0
 ; a < b
 define void @test_pos2_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -328,10 +305,10 @@ entry:
   %cmp3 = icmp slt i32 %a, %b
   br i1 %cmp3, label %for.body.lr.ph, label %for.end
 
-for.body.lr.ph:                                   ; preds = %entry
+for.body.lr.ph:
   br label %for.body
 
-for.body:                                         ; preds = %for.body.lr.ph, %for.body
+for.body:
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
   %0 = load i8, i8* %arrayidx, align 1
@@ -343,13 +320,11 @@ for.body:
   %cmp = icmp slt i32 %inc, %b
   br i1 %cmp, label %for.body, label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:
   ret void
 }
 
-
-
-; CHECK: test_pos4_rr_slt
+; CHECK-LABEL: @test_pos4_rr_slt
 ; CHECK: loop0
 ; a < b
 define void @test_pos4_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -357,10 +332,10 @@ entry:
   %cmp3 = icmp slt i32 %a, %b
   br i1 %cmp3, label %for.body.lr.ph, label %for.end
 
-for.body.lr.ph:                                   ; preds = %entry
+for.body.lr.ph:
   br label %for.body
 
-for.body:                                         ; preds = %for.body.lr.ph, %for.body
+for.body:
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
   %0 = load i8, i8* %arrayidx, align 1
@@ -372,13 +347,11 @@ for.body:
   %cmp = icmp slt i32 %inc, %b
   br i1 %cmp, label %for.body, label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:
   ret void
 }
 
-
-
-; CHECK: test_pos8_rr_slt
+; CHECK-LABEL: @test_pos8_rr_slt
 ; CHECK: loop0
 ; a < b
 define void @test_pos8_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -386,10 +359,10 @@ entry:
   %cmp3 = icmp slt i32 %a, %b
   br i1 %cmp3, label %for.body.lr.ph, label %for.end
 
-for.body.lr.ph:                                   ; preds = %entry
+for.body.lr.ph:
   br label %for.body
 
-for.body:                                         ; preds = %for.body.lr.ph, %for.body
+for.body:
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
   %0 = load i8, i8* %arrayidx, align 1
@@ -401,13 +374,11 @@ for.body:
   %cmp = icmp slt i32 %inc, %b
   br i1 %cmp, label %for.body, label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:
   ret void
 }
 
-
-
-; CHECK: test_pos16_rr_slt
+; CHECK-LABEL: @test_pos16_rr_slt
 ; CHECK: loop0
 ; a < b
 define void @test_pos16_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -415,10 +386,10 @@ entry:
   %cmp3 = icmp slt i32 %a, %b
   br i1 %cmp3, label %for.body.lr.ph, label %for.end
 
-for.body.lr.ph:                                   ; preds = %entry
+for.body.lr.ph:
   br label %for.body
 
-for.body:                                         ; preds = %for.body.lr.ph, %for.body
+for.body:
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
   %0 = load i8, i8* %arrayidx, align 1
@@ -430,7 +401,7 @@ for.body:
   %cmp = icmp slt i32 %inc, %b
   br i1 %cmp, label %for.body, label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:
   ret void
 }
 

Added: llvm/trunk/test/CodeGen/Hexagon/hwloop-missed.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/hwloop-missed.ll?rev=236896&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/hwloop-missed.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/hwloop-missed.ll Fri May  8 15:18:21 2015
@@ -0,0 +1,49 @@
+; RUN: llc -march=hexagon -hexagon-hwloop-preheader < %s | FileCheck %s
+
+; Generate hardware loops when we also need to add a new preheader.
+; we should generate two hardware loops for this test case.
+
+; CHECK: loop0
+; CHECK: endloop0
+; CHECK: loop0
+; CHECK: endloop0
+
+ at g = external global i32
+
+define void @test(i32* nocapture %a, i32* nocapture %b, i32 %n) nounwind {
+entry:
+  %tobool = icmp eq i32 %n, 0
+  br i1 %tobool, label %for.body4.preheader, label %for.body.preheader
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %arrayidx.phi = phi i32* [ %arrayidx.inc, %for.body ], [ %a, %for.body.preheader ]
+  %i.014 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %0 = load i32, i32* @g, align 4
+  store i32 %0, i32* %arrayidx.phi, align 4
+  %inc = add nsw i32 %i.014, 1
+  %exitcond15 = icmp eq i32 %inc, 3
+  %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
+  br i1 %exitcond15, label %for.body4.preheader.loopexit, label %for.body
+
+for.body4.preheader.loopexit:
+  br label %for.body4.preheader
+
+for.body4.preheader:
+  br label %for.body4
+
+for.body4:
+  %arrayidx5.phi = phi i32* [ %arrayidx5.inc, %for.body4 ], [ %b, %for.body4.preheader ]
+  %i1.013 = phi i32 [ %inc7, %for.body4 ], [ 0, %for.body4.preheader ]
+  %1 = load i32, i32* @g, align 4
+  store i32 %1, i32* %arrayidx5.phi, align 4
+  %inc7 = add nsw i32 %i1.013, 1
+  %exitcond = icmp eq i32 %inc7, 3
+  %arrayidx5.inc = getelementptr i32, i32* %arrayidx5.phi, i32 1
+  br i1 %exitcond, label %for.end8, label %for.body4
+
+for.end8:
+  ret void
+}

Added: llvm/trunk/test/CodeGen/Hexagon/hwloop-preheader.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/hwloop-preheader.ll?rev=236896&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/hwloop-preheader.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/hwloop-preheader.ll Fri May  8 15:18:21 2015
@@ -0,0 +1,40 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -hexagon-hwloop-preheader < %s
+; REQUIRES: asserts
+
+; Test that the preheader is added to the parent loop, otherwise
+; we generate an invalid hardware loop.
+
+; Function Attrs: nounwind readonly
+define void @test(i16 signext %n) #0 {
+entry:
+  br i1 undef, label %for.cond4.preheader.preheader.split.us, label %for.end22
+
+for.cond4.preheader.preheader.split.us:
+  %0 = sext i16 %n to i32
+  br label %for.body9.preheader.us
+
+for.body9.us:
+  %indvars.iv = phi i32 [ %indvars.iv.next.7, %for.body9.us ], [ 0, %for.body9.preheader.us ]
+  %indvars.iv.next.7 = add i32 %indvars.iv, 8
+  %lftr.wideiv.7 = trunc i32 %indvars.iv.next.7 to i16
+  %exitcond.7 = icmp slt i16 %lftr.wideiv.7, 0
+  br i1 %exitcond.7, label %for.body9.us, label %for.body9.us.ur
+
+for.body9.preheader.us:
+  %i.030.us.pmt = phi i32 [ %inc21.us.pmt, %for.end.loopexit.us ], [ 0, %for.cond4.preheader.preheader.split.us ]
+  br i1 undef, label %for.body9.us, label %for.body9.us.ur
+
+for.body9.us.ur:
+  %exitcond.ur.old = icmp eq i16 undef, %n
+  br i1 %exitcond.ur.old, label %for.end.loopexit.us, label %for.body9.us.ur
+
+for.end.loopexit.us:
+  %inc21.us.pmt = add i32 %i.030.us.pmt, 1
+  %exitcond33 = icmp eq i32 %inc21.us.pmt, %0
+  br i1 %exitcond33, label %for.end22, label %for.body9.preheader.us
+
+for.end22:
+  ret void
+}
+
+attributes #0 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }

Added: llvm/trunk/test/CodeGen/Hexagon/hwloop1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/hwloop1.ll?rev=236896&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/hwloop1.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/hwloop1.ll Fri May  8 15:18:21 2015
@@ -0,0 +1,161 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; Check that we generate hardware loop instructions.
+
+; Case 1 : Loop with a constant number of iterations.
+; CHECK-LABEL: @hwloop1
+; CHECK: loop0(.LBB{{.}}_{{.}}, #10)
+; CHECK: endloop0
+
+ at a = common global [10 x i32] zeroinitializer, align 4
+define i32 @hwloop1() nounwind {
+entry:
+  br label %for.body
+for.body:
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @a, i32 0, i32 %i.01
+  store i32 %i.01, i32* %arrayidx, align 4
+  %inc = add nsw i32 %i.01, 1
+  %exitcond = icmp eq i32 %inc, 10
+  br i1 %exitcond, label %for.end, label %for.body
+for.end:
+  ret i32 0
+}
+
+; Case 2 : Loop with a run-time number of iterations.
+; CHECK-LABEL: @hwloop2
+; CHECK: loop0(.LBB{{.}}_{{.}}, r{{[0-9]+}})
+; CHECK: endloop0
+
+define i32 @hwloop2(i32 %n, i32* nocapture %b) nounwind {
+entry:
+  %cmp1 = icmp sgt i32 %n, 0
+  br i1 %cmp1, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %a.03 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.02
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %a.03
+  %inc = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  %a.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.end.loopexit ]
+  ret i32 %a.0.lcssa
+}
+
+; Case 3 : Induction variable increment more than 1.
+; CHECK-LABEL: @hwloop3
+; CHECK: lsr(r{{[0-9]+}}, #2)
+; CHECK: loop0(.LBB{{.}}_{{.}}, r{{[0-9]+}})
+; CHECK: endloop0
+
+define i32 @hwloop3(i32 %n, i32* nocapture %b) nounwind {
+entry:
+  %cmp1 = icmp sgt i32 %n, 0
+  br i1 %cmp1, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %a.03 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.02
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %a.03
+  %inc = add nsw i32 %i.02, 4
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  %a.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.end.loopexit ]
+  ret i32 %a.0.lcssa
+}
+
+; Case 4 : Loop exit compare uses register instead of immediate value.
+; CHECK-LABEL: @hwloop4
+; CHECK: loop0(.LBB{{.}}_{{.}}, r{{[0-9]+}})
+; CHECK: endloop0
+
+define i32 @hwloop4(i32 %n, i32* nocapture %b) nounwind {
+entry:
+  %cmp1 = icmp sgt i32 %n, 0
+  br i1 %cmp1, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.02
+  store i32 %i.02, i32* %arrayidx, align 4
+  %inc = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret i32 0
+}
+
+; Case 5: After LSR, the initial value is 100 and the iv decrements to 0.
+; CHECK-LABEL: @hwloop5
+; CHECK: loop0(.LBB{{.}}_{{.}}, #100)
+; CHECK: endloop0
+
+define void @hwloop5(i32* nocapture %a, i32* nocapture %res) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.03
+  %0 = load i32, i32* %arrayidx, align 4
+  %mul = mul nsw i32 %0, %0
+  %arrayidx2 = getelementptr inbounds i32, i32* %res, i32 %i.03
+  store i32 %mul, i32* %arrayidx2, align 4
+  %inc = add nsw i32 %i.03, 1
+  %exitcond = icmp eq i32 %inc, 100
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+; Case 6: Large immediate offset
+; CHECK-LABEL: @hwloop6
+; CHECK-NOT: loop0(.LBB{{.}}_{{.}}, #1024)
+; CHECK: loop0(.LBB{{.}}_{{.}}, r{{[0-9]+}})
+; CHECK: endloop0
+
+define void @hwloop6(i32* nocapture %a, i32* nocapture %res) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.02
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %res, i32 %i.02
+  store i32 %0, i32* %arrayidx1, align 4
+  %inc = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %inc, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}

Added: llvm/trunk/test/CodeGen/Hexagon/hwloop2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/hwloop2.ll?rev=236896&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/hwloop2.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/hwloop2.ll Fri May  8 15:18:21 2015
@@ -0,0 +1,37 @@
+; RUN: llc -disable-lsr -march=hexagon < %s | FileCheck %s
+
+; Test for multiple phis with induction variables.
+
+; CHECK: loop0(.LBB{{.}}_{{.}}, r{{[0-9]+}})
+; CHECK: endloop0
+
+define i32 @hwloop4(i32* nocapture %s, i32* nocapture %a, i32 %n) {
+entry:
+  %cmp3 = icmp eq i32 %n, 0
+  br i1 %cmp3, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:
+  %.pre = load i32, i32* %s, align 4
+  br label %for.body
+
+for.body:
+  %0 = phi i32 [ %.pre, %for.body.lr.ph ], [ %add1, %for.body ]
+  %j.05 = phi i32 [ 0, %for.body.lr.ph ], [ %add2, %for.body ]
+  %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ %n, %for.body.lr.ph ]
+  %lsr.iv1 = phi i32* [ %scevgep, %for.body ], [ %a, %for.body.lr.ph ]
+  %1 = load i32, i32* %lsr.iv1, align 4
+  %add1 = add nsw i32 %0, %1
+  store i32 %add1, i32* %s, align 4
+  %add2 = add nsw i32 %j.05, 1
+  %lsr.iv.next = add i32 %lsr.iv, -1
+  %scevgep = getelementptr i32, i32* %lsr.iv1, i32 1
+  %cmp = icmp eq i32 %lsr.iv.next, 0
+  br i1 %cmp, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  %j.0.lcssa = phi i32 [ 0, %entry ], [ %add2, %for.end.loopexit ]
+  ret i32 %j.0.lcssa
+}

Added: llvm/trunk/test/CodeGen/Hexagon/hwloop3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/hwloop3.ll?rev=236896&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/hwloop3.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/hwloop3.ll Fri May  8 15:18:21 2015
@@ -0,0 +1,27 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+;
+; Remove the unconditional jump to following instruction.
+
+; CHECK: endloop0
+; CHECK-NOT: jump [[L1:.]]
+; CHECK-NOT: [[L1]]
+
+define void @test(i32* nocapture %a, i32 %n) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %arrayidx.phi = phi i32* [ %a, %entry ], [ %arrayidx.inc, %for.body ]
+  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %0 = load i32, i32* %arrayidx.phi, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, i32* %arrayidx.phi, align 4
+  %inc = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %inc, 100
+  %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+

Added: llvm/trunk/test/CodeGen/Hexagon/hwloop4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/hwloop4.ll?rev=236896&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/hwloop4.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/hwloop4.ll Fri May  8 15:18:21 2015
@@ -0,0 +1,76 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+;
+; Remove the unnecessary 'add' instruction used for the hardware loop setup.
+
+; CHECK: [[OP0:r[0-9]+]] = add([[OP1:r[0-9]+]], #-[[OP2:[0-9]+]]
+; CHECK-NOT: add([[OP0]], #[[OP2]])
+; CHECK: lsr([[OP1]], #{{[0-9]+}})
+; CHECK: loop0
+
+define void @matrix_mul_matrix(i32 %N, i32* nocapture %C, i16* nocapture readnone %A, i16* nocapture readnone %B) #0 {
+entry:
+  %cmp4 = icmp eq i32 %N, 0
+  br i1 %cmp4, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+  %maxval = add i32 %N, -7
+  %0 = icmp sgt i32 %maxval, 0
+  br i1 %0, label %for.body.preheader9, label %for.body.ur.preheader
+
+for.body.preheader9:
+  br label %for.body
+
+for.body:
+  %arrayidx.phi = phi i32* [ %arrayidx.inc.7, %for.body ], [ %C, %for.body.preheader9 ]
+  %i.05 = phi i32 [ %inc.7, %for.body ], [ 0, %for.body.preheader9 ]
+  store i32 %i.05, i32* %arrayidx.phi, align 4
+  %inc = add i32 %i.05, 1
+  %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
+  store i32 %inc, i32* %arrayidx.inc, align 4
+  %inc.1 = add i32 %i.05, 2
+  %arrayidx.inc.1 = getelementptr i32, i32* %arrayidx.phi, i32 2
+  store i32 %inc.1, i32* %arrayidx.inc.1, align 4
+  %inc.2 = add i32 %i.05, 3
+  %arrayidx.inc.2 = getelementptr i32, i32* %arrayidx.phi, i32 3
+  store i32 %inc.2, i32* %arrayidx.inc.2, align 4
+  %inc.3 = add i32 %i.05, 4
+  %arrayidx.inc.3 = getelementptr i32, i32* %arrayidx.phi, i32 4
+  store i32 %inc.3, i32* %arrayidx.inc.3, align 4
+  %inc.4 = add i32 %i.05, 5
+  %arrayidx.inc.4 = getelementptr i32, i32* %arrayidx.phi, i32 5
+  store i32 %inc.4, i32* %arrayidx.inc.4, align 4
+  %inc.5 = add i32 %i.05, 6
+  %arrayidx.inc.5 = getelementptr i32, i32* %arrayidx.phi, i32 6
+  store i32 %inc.5, i32* %arrayidx.inc.5, align 4
+  %inc.6 = add i32 %i.05, 7
+  %arrayidx.inc.6 = getelementptr i32, i32* %arrayidx.phi, i32 7
+  store i32 %inc.6, i32* %arrayidx.inc.6, align 4
+  %inc.7 = add i32 %i.05, 8
+  %exitcond.7 = icmp slt i32 %inc.7, %maxval
+  %arrayidx.inc.7 = getelementptr i32, i32* %arrayidx.phi, i32 8
+  br i1 %exitcond.7, label %for.body, label %for.end.loopexit.ur-lcssa
+
+for.end.loopexit.ur-lcssa:
+  %1 = icmp eq i32 %inc.7, %N
+  br i1 %1, label %for.end, label %for.body.ur.preheader
+
+for.body.ur.preheader:
+  %arrayidx.phi.ur.ph = phi i32* [ %C, %for.body.preheader ], [ %arrayidx.inc.7, %for.end.loopexit.ur-lcssa ]
+  %i.05.ur.ph = phi i32 [ 0, %for.body.preheader ], [ %inc.7, %for.end.loopexit.ur-lcssa ]
+  br label %for.body.ur
+
+for.body.ur:
+  %arrayidx.phi.ur = phi i32* [ %arrayidx.inc.ur, %for.body.ur ], [ %arrayidx.phi.ur.ph, %for.body.ur.preheader ]
+  %i.05.ur = phi i32 [ %inc.ur, %for.body.ur ], [ %i.05.ur.ph, %for.body.ur.preheader ]
+  store i32 %i.05.ur, i32* %arrayidx.phi.ur, align 4
+  %inc.ur = add i32 %i.05.ur, 1
+  %exitcond.ur = icmp eq i32 %inc.ur, %N
+  %arrayidx.inc.ur = getelementptr i32, i32* %arrayidx.phi.ur, i32 1
+  br i1 %exitcond.ur, label %for.end.loopexit, label %for.body.ur
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}





More information about the llvm-commits mailing list