[llvm] r293213 - [Hexagon] Add Hexagon-specific loop idiom recognition pass

Fri Jan 27 12:51:53 PST 2017

Hi Galina,
Fixed in r293331.

-Krzysztof

On 1/27/2017 1:57 PM, Galina Kistanova wrote:
> Hello Krzysztof,
>
> This commit added a warning to one of our builders:
>
> llvm.src/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp:1083:8:
> warning: variable ‘IsVolatile’ set but not used [-Wunused-but-set-variable]
>
> http://lab.llvm.org:8011/builders/clang-3stage-ubuntu
>
> Please have a look at this?
>
> Thanks
>
> Galina
>
>
> On Thu, Jan 26, 2017 at 1:41 PM, Krzysztof Parzyszek via llvm-commits
> <llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>> wrote:
>
>     Author: kparzysz
>     Date: Thu Jan 26 15:41:10 2017
>     New Revision: 293213
>
>     URL: http://llvm.org/viewvc/llvm-project?rev=293213&view=rev
>     <http://llvm.org/viewvc/llvm-project?rev=293213&view=rev>
>     Log:
>     [Hexagon] Add Hexagon-specific loop idiom recognition pass
>
>     Added:
>         llvm/trunk/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
>         llvm/trunk/test/CodeGen/Hexagon/loop-idiom/
>         llvm/trunk/test/CodeGen/Hexagon/loop-idiom/hexagon-memmove1.ll
>         llvm/trunk/test/CodeGen/Hexagon/loop-idiom/hexagon-memmove2.ll
>         llvm/trunk/test/CodeGen/Hexagon/loop-idiom/lcssa.ll
>         llvm/trunk/test/CodeGen/Hexagon/loop-idiom/nullptr-crash.ll
>         llvm/trunk/test/CodeGen/Hexagon/loop-idiom/pmpy.ll
>     Modified:
>         llvm/trunk/lib/Target/Hexagon/CMakeLists.txt
>         llvm/trunk/lib/Target/Hexagon/HexagonTargetMachine.cpp
>         llvm/trunk/lib/Target/Hexagon/HexagonTargetMachine.h
>
>     Modified: llvm/trunk/lib/Target/Hexagon/CMakeLists.txt
>     URL:
>     http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/CMakeLists.txt?rev=293213&r1=293212&r2=293213&view=diff
>     <http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/CMakeLists.txt?rev=293213&r1=293212&r2=293213&view=diff>
>     ==============================================================================
>     --- llvm/trunk/lib/Target/Hexagon/CMakeLists.txt (original)
>     +++ llvm/trunk/lib/Target/Hexagon/CMakeLists.txt Thu Jan 26 15:41:10
>     2017
>     @@ -35,6 +35,7 @@ add_llvm_target(HexagonCodeGen
>        HexagonInstrInfo.cpp
>        HexagonISelDAGToDAG.cpp
>        HexagonISelLowering.cpp
>     +  HexagonLoopIdiomRecognition.cpp
>        HexagonMachineFunctionInfo.cpp
>        HexagonMachineScheduler.cpp
>        HexagonMCInstLower.cpp
>
>     Added: llvm/trunk/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
>     URL:
>     http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp?rev=293213&view=auto
>     <http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp?rev=293213&view=auto>
>     ==============================================================================
>     --- llvm/trunk/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
>     (added)
>     +++ llvm/trunk/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
>     Thu Jan 26 15:41:10 2017
>     @@ -0,0 +1,1618 @@
>     +//===--- HexagonLoopIdiomRecognition.cpp
>     ----------------------------------===//
>     +//
>     +//                     The LLVM Compiler Infrastructure
>     +//
>     +// This file is distributed under the University of Illinois Open
>     Source
>     +// License. See LICENSE.TXT for details.
>     +//
>     +//===----------------------------------------------------------------------===//
>     +
>     +#define DEBUG_TYPE "hexagon-lir"
>     +
>     +#include "llvm/ADT/SetVector.h"
>     +#include "llvm/ADT/SmallSet.h"
>     +#include "llvm/Analysis/AliasAnalysis.h"
>     +#include "llvm/Analysis/InstructionSimplify.h"
>     +#include "llvm/Analysis/LoopPass.h"
>     +#include "llvm/Analysis/ScalarEvolution.h"
>     +#include "llvm/Analysis/ScalarEvolutionExpander.h"
>     +#include "llvm/Analysis/ScalarEvolutionExpressions.h"
>     +#include "llvm/Analysis/TargetLibraryInfo.h"
>     +#include "llvm/Analysis/ValueTracking.h"
>     +#include "llvm/IR/DataLayout.h"
>     +#include "llvm/IR/Dominators.h"
>     +#include "llvm/IR/IRBuilder.h"
>     +#include "llvm/IR/PatternMatch.h"
>     +#include "llvm/Transforms/Scalar.h"
>     +#include "llvm/Transforms/Utils/Local.h"
>     +#include "llvm/Support/Debug.h"
>     +#include "llvm/Support/raw_ostream.h"
>     +
>     +#include <algorithm>
>     +#include <array>
>     +
>     +using namespace llvm;
>     +
>     +static cl::opt<bool> DisableMemcpyIdiom("disable-memcpy-idiom",
>     +  cl::Hidden, cl::init(false),
>     +  cl::desc("Disable generation of memcpy in loop idiom recognition"));
>     +
>     +static cl::opt<bool> DisableMemmoveIdiom("disable-memmove-idiom",
>     +  cl::Hidden, cl::init(false),
>     +  cl::desc("Disable generation of memmove in loop idiom recognition"));
>     +
>     +static cl::opt<unsigned>
>     RuntimeMemSizeThreshold("runtime-mem-idiom-threshold",
>     +  cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the
>     runtime "
>     +  "check guarding the memmove."));
>     +
>     +static cl::opt<unsigned> CompileTimeMemSizeThreshold(
>     +  "compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64),
>     +  cl::desc("Threshold (in bytes) to perform the transformation, if
>     the "
>     +    "runtime loop count (mem transfer size) is known at
>     compile-time."));
>     +
>     +static cl::opt<bool>
>     OnlyNonNestedMemmove("only-nonnested-memmove-idiom",
>     +  cl::Hidden, cl::init(true),
>     +  cl::desc("Only enable generating memmove in non-nested loops"));
>     +
>     +cl::opt<bool> HexagonVolatileMemcpy("disable-hexagon-volatile-memcpy",
>     +  cl::Hidden, cl::init(false),
>     +  cl::desc("Enable Hexagon-specific memcpy for volatile
>     destination."));
>     +
>     +static const char *HexagonVolatileMemcpyName
>     +  = "hexagon_memcpy_forward_vp4cp4n2";
>     +
>     +
>     +namespace llvm {
>     +  void initializeHexagonLoopIdiomRecognizePass(PassRegistry&);
>     +  Pass *createHexagonLoopIdiomPass();
>     +}
>     +
>     +namespace {
>     +  class HexagonLoopIdiomRecognize : public LoopPass {
>     +  public:
>     +    static char ID;
>     +    explicit HexagonLoopIdiomRecognize() : LoopPass(ID) {
>     +
>     initializeHexagonLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
>     +    }
>     +    StringRef getPassName() const override {
>     +      return "Recognize Hexagon-specific loop idioms";
>     +    }
>     +
>     +   void getAnalysisUsage(AnalysisUsage &AU) const override {
>     +      AU.addRequired<LoopInfoWrapperPass>();
>     +      AU.addRequiredID(LoopSimplifyID);
>     +      AU.addRequiredID(LCSSAID);
>     +      AU.addRequired<AAResultsWrapperPass>();
>     +      AU.addPreserved<AAResultsWrapperPass>();
>     +      AU.addRequired<ScalarEvolutionWrapperPass>();
>     +      AU.addRequired<DominatorTreeWrapperPass>();
>     +      AU.addRequired<TargetLibraryInfoWrapperPass>();
>     +      AU.addPreserved<TargetLibraryInfoWrapperPass>();
>     +    }
>     +
>     +    bool runOnLoop(Loop *L, LPPassManager &LPM) override;
>     +
>     +  private:
>     +    unsigned getStoreSizeInBytes(StoreInst *SI);
>     +    int getSCEVStride(const SCEVAddRecExpr *StoreEv);
>     +    bool isLegalStore(Loop *CurLoop, StoreInst *SI);
>     +    void collectStores(Loop *CurLoop, BasicBlock *BB,
>     +        SmallVectorImpl<StoreInst*> &Stores);
>     +    bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const
>     SCEV *BECount);
>     +    bool coverLoop(Loop *L, SmallVectorImpl<Instruction*> &Insts)
>     const;
>     +    bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV
>     *BECount,
>     +        SmallVectorImpl<BasicBlock*> &ExitBlocks);
>     +    bool runOnCountableLoop(Loop *L);
>     +
>     +    AliasAnalysis *AA;
>     +    const DataLayout *DL;
>     +    DominatorTree *DT;
>     +    LoopInfo *LF;
>     +    const TargetLibraryInfo *TLI;
>     +    ScalarEvolution *SE;
>     +    bool HasMemcpy, HasMemmove;
>     +  };
>     +}
>     +
>     +char HexagonLoopIdiomRecognize::ID = 0;
>     +
>     +INITIALIZE_PASS_BEGIN(HexagonLoopIdiomRecognize, "hexagon-loop-idiom",
>     +    "Recognize Hexagon-specific loop idioms", false, false)
>     +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
>     +INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
>     +INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
>     +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
>     +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
>     +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
>     +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
>     +INITIALIZE_PASS_END(HexagonLoopIdiomRecognize, "hexagon-loop-idiom",
>     +    "Recognize Hexagon-specific loop idioms", false, false)
>     +
>     +
>     +//===----------------------------------------------------------------------===//
>     +//
>     +//          Implementation of PolynomialMultiplyRecognize
>     +//
>     +//===----------------------------------------------------------------------===//
>     +
>     +namespace {
>     +  class PolynomialMultiplyRecognize {
>     +  public:
>     +    explicit PolynomialMultiplyRecognize(Loop *loop, const
>     DataLayout &dl,
>     +        const DominatorTree &dt, const TargetLibraryInfo &tli,
>     +        ScalarEvolution &se)
>     +      : CurLoop(loop), DL(dl), DT(dt), TLI(tli), SE(se) {}
>     +
>     +    bool recognize();
>     +  private:
>     +    typedef SetVector<Value*> ValueSeq;
>     +
>     +    Value *getCountIV(BasicBlock *BB);
>     +    bool findCycle(Value *Out, Value *In, ValueSeq &Cycle);
>     +    void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq
>     &Early,
>     +          ValueSeq &Late);
>     +    bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq
>     &Late);
>     +    bool commutesWithShift(Instruction *I);
>     +    bool highBitsAreZero(Value *V, unsigned IterCount);
>     +    bool keepsHighBitsZero(Value *V, unsigned IterCount);
>     +    bool isOperandShifted(Instruction *I, Value *Op);
>     +    bool convertShiftsToLeft(BasicBlock *LoopB, BasicBlock *ExitB,
>     +          unsigned IterCount);
>     +    void cleanupLoopBody(BasicBlock *LoopB);
>     +
>     +    struct ParsedValues {
>     +      ParsedValues() : M(nullptr), P(nullptr), Q(nullptr), R(nullptr),
>     +          X(nullptr), Res(nullptr), IterCount(0), Left(false),
>     Inv(false) {}
>     +      Value *M, *P, *Q, *R, *X;
>     +      Instruction *Res;
>     +      unsigned IterCount;
>     +      bool Left, Inv;
>     +    };
>     +
>     +    bool matchLeftShift(SelectInst *SelI, Value *CIV, ParsedValues
>     &PV);
>     +    bool matchRightShift(SelectInst *SelI, ParsedValues &PV);
>     +    bool scanSelect(SelectInst *SI, BasicBlock *LoopB, BasicBlock
>     *PrehB,
>     +          Value *CIV, ParsedValues &PV, bool PreScan);
>     +    unsigned getInverseMxN(unsigned QP);
>     +    Value *generate(BasicBlock::iterator At, ParsedValues &PV);
>     +
>     +    Loop *CurLoop;
>     +    const DataLayout &DL;
>     +    const DominatorTree &DT;
>     +    const TargetLibraryInfo &TLI;
>     +    ScalarEvolution &SE;
>     +  };
>     +}
>     +
>     +
>     +Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) {
>     +  pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
>     +  if (std::distance(PI, PE) != 2)
>     +    return nullptr;
>     +  BasicBlock *PB = (*PI == BB) ? *std::next(PI) : *PI;
>     +
>     +  for (auto I = BB->begin(), E = BB->end(); I != E &&
>     isa<PHINode>(I); ++I) {
>     +    auto *PN = cast<PHINode>(I);
>     +    Value *InitV = PN->getIncomingValueForBlock(PB);
>     +    if (!isa<ConstantInt>(InitV) ||
>     !cast<ConstantInt>(InitV)->isZero())
>     +      continue;
>     +    Value *IterV = PN->getIncomingValueForBlock(BB);
>     +    if (!isa<BinaryOperator>(IterV))
>     +      continue;
>     +    auto *BO = dyn_cast<BinaryOperator>(IterV);
>     +    if (BO->getOpcode() != Instruction::Add)
>     +      continue;
>     +    Value *IncV = nullptr;
>     +    if (BO->getOperand(0) == PN)
>     +      IncV = BO->getOperand(1);
>     +    else if (BO->getOperand(1) == PN)
>     +      IncV = BO->getOperand(0);
>     +    if (IncV == nullptr)
>     +      continue;
>     +
>     +    if (auto *T = dyn_cast<ConstantInt>(IncV))
>     +      if (T->getZExtValue() == 1)
>     +        return PN;
>     +  }
>     +  return nullptr;
>     +}
>     +
>     +
>     +static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock
>     *BB) {
>     +  for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) {
>     +    Use &TheUse = UI.getUse();
>     +    ++UI;
>     +    if (auto *II = dyn_cast<Instruction>(TheUse.getUser()))
>     +      if (BB == II->getParent())
>     +        II->replaceUsesOfWith(I, J);
>     +  }
>     +}
>     +
>     +
>     +bool PolynomialMultiplyRecognize::matchLeftShift(SelectInst *SelI,
>     +      Value *CIV, ParsedValues &PV) {
>     +  // Match the following:
>     +  //   select (X & (1 << i)) != 0 ? R ^ (Q << i) : R
>     +  //   select (X & (1 << i)) == 0 ? R : R ^ (Q << i)
>     +  // The condition may also check for equality with the masked
>     value, i.e
>     +  //   select (X & (1 << i)) == (1 << i) ? R ^ (Q << i) : R
>     +  //   select (X & (1 << i)) != (1 << i) ? R : R ^ (Q << i);
>     +
>     +  Value *CondV = SelI->getCondition();
>     +  Value *TrueV = SelI->getTrueValue();
>     +  Value *FalseV = SelI->getFalseValue();
>     +
>     +  using namespace PatternMatch;
>     +
>     +  CmpInst::Predicate P;
>     +  Value *A = nullptr, *B = nullptr, *C = nullptr;
>     +
>     +  if (!match(CondV, m_ICmp(P, m_And(m_Value(A), m_Value(B)),
>     m_Value(C))) &&
>     +      !match(CondV, m_ICmp(P, m_Value(C), m_And(m_Value(A),
>     m_Value(B)))))
>     +    return false;
>     +  if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
>     +    return false;
>     +  // Matched: select (A & B) == C ? ... : ...
>     +  //          select (A & B) != C ? ... : ...
>     +
>     +  Value *X = nullptr, *Sh1 = nullptr;
>     +  // Check (A & B) for (X & (1 << i)):
>     +  if (match(A, m_Shl(m_One(), m_Specific(CIV)))) {
>     +    Sh1 = A;
>     +    X = B;
>     +  } else if (match(B, m_Shl(m_One(), m_Specific(CIV)))) {
>     +    Sh1 = B;
>     +    X = A;
>     +  } else {
>     +    // TODO: Could also check for an induction variable containing
>     single
>     +    // bit shifted left by 1 in each iteration.
>     +    return false;
>     +  }
>     +
>     +  bool TrueIfZero;
>     +
>     +  // Check C against the possible values for comparison: 0 and (1
>     << i):
>     +  if (match(C, m_Zero()))
>     +    TrueIfZero = (P == CmpInst::ICMP_EQ);
>     +  else if (C == Sh1)
>     +    TrueIfZero = (P == CmpInst::ICMP_NE);
>     +  else
>     +    return false;
>     +
>     +  // So far, matched:
>     +  //   select (X & (1 << i)) ? ... : ...
>     +  // including variations of the check against zero/non-zero value.
>     +
>     +  Value *ShouldSameV = nullptr, *ShouldXoredV = nullptr;
>     +  if (TrueIfZero) {
>     +    ShouldSameV = TrueV;
>     +    ShouldXoredV = FalseV;
>     +  } else {
>     +    ShouldSameV = FalseV;
>     +    ShouldXoredV = TrueV;
>     +  }
>     +
>     +  Value *Q = nullptr, *R = nullptr, *Y = nullptr, *Z = nullptr;
>     +  Value *T = nullptr;
>     +  if (match(ShouldXoredV, m_Xor(m_Value(Y), m_Value(Z)))) {
>     +    // Matched: select +++ ? ... : Y ^ Z
>     +    //          select +++ ? Y ^ Z : ...
>     +    // where +++ denotes previously checked matches.
>     +    if (ShouldSameV == Y)
>     +      T = Z;
>     +    else if (ShouldSameV == Z)
>     +      T = Y;
>     +    else
>     +      return false;
>     +    R = ShouldSameV;
>     +    // Matched: select +++ ? R : R ^ T
>     +    //          select +++ ? R ^ T : R
>     +    // depending on TrueIfZero.
>     +
>     +  } else if (match(ShouldSameV, m_Zero())) {
>     +    // Matched: select +++ ? 0 : ...
>     +    //          select +++ ? ... : 0
>     +    if (!SelI->hasOneUse())
>     +      return false;
>     +    T = ShouldXoredV;
>     +    // Matched: select +++ ? 0 : T
>     +    //          select +++ ? T : 0
>     +
>     +    Value *U = *SelI->user_begin();
>     +    if (!match(U, m_Xor(m_Specific(SelI), m_Value(R))) &&
>     +        !match(U, m_Xor(m_Value(R), m_Specific(SelI))))
>     +      return false;
>     +    // Matched: xor (select +++ ? 0 : T), R
>     +    //          xor (select +++ ? T : 0), R
>     +  } else
>     +    return false;
>     +
>     +  // The xor input value T is isolated into its own match so that
>     it could
>     +  // be checked against an induction variable containing a shifted bit
>     +  // (todo).
>     +  // For now, check against (Q << i).
>     +  if (!match(T, m_Shl(m_Value(Q), m_Specific(CIV))) &&
>     +      !match(T, m_Shl(m_ZExt(m_Value(Q)), m_ZExt(m_Specific(CIV)))))
>     +    return false;
>     +  // Matched: select +++ ? R : R ^ (Q << i)
>     +  //          select +++ ? R ^ (Q << i) : R
>     +
>     +  PV.X = X;
>     +  PV.Q = Q;
>     +  PV.R = R;
>     +  PV.Left = true;
>     +  return true;
>     +}
>     +
>     +
>     +bool PolynomialMultiplyRecognize::matchRightShift(SelectInst *SelI,
>     +      ParsedValues &PV) {
>     +  // Match the following:
>     +  //   select (X & 1) != 0 ? (R >> 1) ^ Q : (R >> 1)
>     +  //   select (X & 1) == 0 ? (R >> 1) : (R >> 1) ^ Q
>     +  // The condition may also check for equality with the masked
>     value, i.e
>     +  //   select (X & 1) == 1 ? (R >> 1) ^ Q : (R >> 1)
>     +  //   select (X & 1) != 1 ? (R >> 1) : (R >> 1) ^ Q
>     +
>     +  Value *CondV = SelI->getCondition();
>     +  Value *TrueV = SelI->getTrueValue();
>     +  Value *FalseV = SelI->getFalseValue();
>     +
>     +  using namespace PatternMatch;
>     +
>     +  Value *C = nullptr;
>     +  CmpInst::Predicate P;
>     +  bool TrueIfZero;
>     +
>     +  if (match(CondV, m_ICmp(P, m_Value(C), m_Zero())) ||
>     +      match(CondV, m_ICmp(P, m_Zero(), m_Value(C)))) {
>     +    if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
>     +      return false;
>     +    // Matched: select C == 0 ? ... : ...
>     +    //          select C != 0 ? ... : ...
>     +    TrueIfZero = (P == CmpInst::ICMP_EQ);
>     +  } else if (match(CondV, m_ICmp(P, m_Value(C), m_One())) ||
>     +             match(CondV, m_ICmp(P, m_One(), m_Value(C)))) {
>     +    if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
>     +      return false;
>     +    // Matched: select C == 1 ? ... : ...
>     +    //          select C != 1 ? ... : ...
>     +    TrueIfZero = (P == CmpInst::ICMP_NE);
>     +  } else
>     +    return false;
>     +
>     +  Value *X = nullptr;
>     +  if (!match(C, m_And(m_Value(X), m_One())) &&
>     +      !match(C, m_And(m_One(), m_Value(X))))
>     +    return false;
>     +  // Matched: select (X & 1) == +++ ? ... : ...
>     +  //          select (X & 1) != +++ ? ... : ...
>     +
>     +  Value *R = nullptr, *Q = nullptr;
>     +  if (TrueIfZero) {
>     +    // The select's condition is true if the tested bit is 0.
>     +    // TrueV must be the shift, FalseV must be the xor.
>     +    if (!match(TrueV, m_LShr(m_Value(R), m_One())))
>     +      return false;
>     +    // Matched: select +++ ? (R >> 1) : ...
>     +    if (!match(FalseV, m_Xor(m_Specific(TrueV), m_Value(Q))) &&
>     +        !match(FalseV, m_Xor(m_Value(Q), m_Specific(TrueV))))
>     +      return false;
>     +    // Matched: select +++ ? (R >> 1) : (R >> 1) ^ Q
>     +    // with commuting ^.
>     +  } else {
>     +    // The select's condition is true if the tested bit is 1.
>     +    // TrueV must be the xor, FalseV must be the shift.
>     +    if (!match(FalseV, m_LShr(m_Value(R), m_One())))
>     +      return false;
>     +    // Matched: select +++ ? ... : (R >> 1)
>     +    if (!match(TrueV, m_Xor(m_Specific(FalseV), m_Value(Q))) &&
>     +        !match(TrueV, m_Xor(m_Value(Q), m_Specific(FalseV))))
>     +      return false;
>     +    // Matched: select +++ ? (R >> 1) ^ Q : (R >> 1)
>     +    // with commuting ^.
>     +  }
>     +
>     +  PV.X = X;
>     +  PV.Q = Q;
>     +  PV.R = R;
>     +  PV.Left = false;
>     +  return true;
>     +}
>     +
>     +
>     +bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI,
>     +      BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV,
>     ParsedValues &PV,
>     +      bool PreScan) {
>     +  using namespace PatternMatch;
>     +
>     +  // The basic pattern for R = P.Q is:
>     +  // for i = 0..31
>     +  //   R = phi (0, R')
>     +  //   if (P & (1 << i))        ; test-bit(P, i)
>     +  //     R' = R ^ (Q << i)
>     +  //
>     +  // Similarly, the basic pattern for R = (P/Q).Q - P
>     +  // for i = 0..31
>     +  //   R = phi(P, R')
>     +  //   if (R & (1 << i))
>     +  //     R' = R ^ (Q << i)
>     +
>     +  // There exist idioms, where instead of Q being shifted left, P
>     is shifted
>     +  // right. This produces a result that is shifted right by 32 bits
>     (the
>     +  // non-shifted result is 64-bit).
>     +  //
>     +  // For R = P.Q, this would be:
>     +  // for i = 0..31
>     +  //   R = phi (0, R')
>     +  //   if ((P >> i) & 1)
>     +  //     R' = (R >> 1) ^ Q      ; R is cycled through the loop, so
>     it must
>     +  //   else                     ; be shifted by 1, not i.
>     +  //     R' = R >> 1
>     +  //
>     +  // And for the inverse:
>     +  // for i = 0..31
>     +  //   R = phi (P, R')
>     +  //   if (R & 1)
>     +  //     R' = (R >> 1) ^ Q
>     +  //   else
>     +  //     R' = R >> 1
>     +
>     +  // The left-shifting idioms share the same pattern:
>     +  //   select (X & (1 << i)) ? R ^ (Q << i) : R
>     +  // Similarly for right-shifting idioms:
>     +  //   select (X & 1) ? (R >> 1) ^ Q
>     +
>     +  if (matchLeftShift(SelI, CIV, PV)) {
>     +    // If this is a pre-scan, getting this far is sufficient.
>     +    if (PreScan)
>     +      return true;
>     +
>     +    // Need to make sure that the SelI goes back into R.
>     +    auto *RPhi = dyn_cast<PHINode>(PV.R);
>     +    if (!RPhi)
>     +      return false;
>     +    if (SelI != RPhi->getIncomingValueForBlock(LoopB))
>     +      return false;
>     +    PV.Res = SelI;
>     +
>     +    // If X is loop invariant, it must be the input polynomial, and the
>     +    // idiom is the basic polynomial multiply.
>     +    if (CurLoop->isLoopInvariant(PV.X)) {
>     +      PV.P = PV.X;
>     +      PV.Inv = false;
>     +    } else {
>     +      // X is not loop invariant. If X == R, this is the inverse pmpy.
>     +      // Otherwise, check for an xor with an invariant value. If the
>     +      // variable argument to the xor is R, then this is still a valid
>     +      // inverse pmpy.
>     +      PV.Inv = true;
>     +      if (PV.X != PV.R) {
>     +        Value *Var = nullptr, *Inv = nullptr, *X1 = nullptr, *X2 =
>     nullptr;
>     +        if (!match(PV.X, m_Xor(m_Value(X1), m_Value(X2))))
>     +          return false;
>     +        auto *I1 = dyn_cast<Instruction>(X1);
>     +        auto *I2 = dyn_cast<Instruction>(X2);
>     +        if (!I1 || I1->getParent() != LoopB) {
>     +          Var = X2;
>     +          Inv = X1;
>     +        } else if (!I2 || I2->getParent() != LoopB) {
>     +          Var = X1;
>     +          Inv = X2;
>     +        } else
>     +          return false;
>     +        if (Var != PV.R)
>     +          return false;
>     +        PV.M = Inv;
>     +      }
>     +      // The input polynomial P still needs to be determined. It
>     will be
>     +      // the entry value of R.
>     +      Value *EntryP = RPhi->getIncomingValueForBlock(PrehB);
>     +      PV.P = EntryP;
>     +    }
>     +
>     +    return true;
>     +  }
>     +
>     +  if (matchRightShift(SelI, PV)) {
>     +    // If this is an inverse pattern, the Q polynomial must be known at
>     +    // compile time.
>     +    if (PV.Inv && !isa<ConstantInt>(PV.Q))
>     +      return false;
>     +    if (PreScan)
>     +      return true;
>     +    // There is no exact matching of right-shift pmpy.
>     +    return false;
>     +  }
>     +
>     +  return false;
>     +}
>     +
>     +
>     +bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In,
>     +      ValueSeq &Cycle) {
>     +  // Out = ..., In, ...
>     +  if (Out == In)
>     +    return true;
>     +
>     +  auto *BB = cast<Instruction>(Out)->getParent();
>     +  bool HadPhi = false;
>     +
>     +  for (auto U : Out->users()) {
>     +    auto *I = dyn_cast<Instruction>(&*U);
>     +    if (I == nullptr || I->getParent() != BB)
>     +      continue;
>     +    // Make sure that there are no multi-iteration cycles, e.g.
>     +    //   p1 = phi(p2)
>     +    //   p2 = phi(p1)
>     +    // The cycle p1->p2->p1 would span two loop iterations.
>     +    // Check that there is only one phi in the cycle.
>     +    bool IsPhi = isa<PHINode>(I);
>     +    if (IsPhi && HadPhi)
>     +      return false;
>     +    HadPhi |= IsPhi;
>     +    if (Cycle.count(I))
>     +      return false;
>     +    Cycle.insert(I);
>     +    if (findCycle(I, In, Cycle))
>     +      break;
>     +    Cycle.remove(I);
>     +  }
>     +  return !Cycle.empty();
>     +}
>     +
>     +
>     +void PolynomialMultiplyRecognize::classifyCycle(Instruction *DivI,
>     +      ValueSeq &Cycle, ValueSeq &Early, ValueSeq &Late) {
>     +  // All the values in the cycle that are between the phi node and the
>     +  // divider instruction will be classified as "early", all other
>     values
>     +  // will be "late".
>     +
>     +  bool IsE = true;
>     +  unsigned I, N = Cycle.size();
>     +  for (I = 0; I < N; ++I) {
>     +    Value *V = Cycle[I];
>     +    if (DivI == V)
>     +      IsE = false;
>     +    else if (!isa<PHINode>(V))
>     +      continue;
>     +    // Stop if found either.
>     +    break;
>     +  }
>     +  // "I" is the index of either DivI or the phi node, whichever was
>     first.
>     +  // "E" is "false" or "true" respectively.
>     +  ValueSeq &First = !IsE ? Early : Late;
>     +  for (unsigned J = 0; J < I; ++J)
>     +    First.insert(Cycle[J]);
>     +
>     +  ValueSeq &Second = IsE ? Early : Late;
>     +  Second.insert(Cycle[I]);
>     +  for (++I; I < N; ++I) {
>     +    Value *V = Cycle[I];
>     +    if (DivI == V || isa<PHINode>(V))
>     +      break;
>     +    Second.insert(V);
>     +  }
>     +
>     +  for (; I < N; ++I)
>     +    First.insert(Cycle[I]);
>     +}
>     +
>     +
>     +bool PolynomialMultiplyRecognize::classifyInst(Instruction *UseI,
>     +      ValueSeq &Early, ValueSeq &Late) {
>     +  // Select is an exception, since the condition value does not
>     have to be
>     +  // classified in the same way as the true/false values. The
>     true/false
>     +  // values do have to be both early or both late.
>     +  if (UseI->getOpcode() == Instruction::Select) {
>     +    Value *TV = UseI->getOperand(1), *FV = UseI->getOperand(2);
>     +    if (Early.count(TV) || Early.count(FV)) {
>     +      if (Late.count(TV) || Late.count(FV))
>     +        return false;
>     +      Early.insert(UseI);
>     +    } else if (Late.count(TV) || Late.count(FV)) {
>     +      if (Early.count(TV) || Early.count(FV))
>     +        return false;
>     +      Late.insert(UseI);
>     +    }
>     +    return true;
>     +  }
>     +
>     +  // Not sure what would be the example of this, but the code below
>     relies
>     +  // on having at least one operand.
>     +  if (UseI->getNumOperands() == 0)
>     +    return true;
>     +
>     +  bool AE = true, AL = true;
>     +  for (auto &I : UseI->operands()) {
>     +    if (Early.count(&*I))
>     +      AL = false;
>     +    else if (Late.count(&*I))
>     +      AE = false;
>     +  }
>     +  // If the operands appear "all early" and "all late" at the same
>     time,
>     +  // then it means that none of them are actually classified as either.
>     +  // This is harmless.
>     +  if (AE && AL)
>     +    return true;
>     +  // Conversely, if they are neither "all early" nor "all late", then
>     +  // we have a mixture of early and late operands that is not a known
>     +  // exception.
>     +  if (!AE && !AL)
>     +    return false;
>     +
>     +  // Check that we have covered the two special cases.
>     +  assert(AE != AL);
>     +
>     +  if (AE)
>     +    Early.insert(UseI);
>     +  else
>     +    Late.insert(UseI);
>     +  return true;
>     +}
>     +
>     +
>     +bool PolynomialMultiplyRecognize::commutesWithShift(Instruction *I) {
>     +  switch (I->getOpcode()) {
>     +    case Instruction::And:
>     +    case Instruction::Or:
>     +    case Instruction::Xor:
>     +    case Instruction::LShr:
>     +    case Instruction::Shl:
>     +    case Instruction::Select:
>     +    case Instruction::ICmp:
>     +    case Instruction::PHI:
>     +      break;
>     +    default:
>     +      return false;
>     +  }
>     +  return true;
>     +}
>     +
>     +
>     +bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V,
>     +      unsigned IterCount) {
>     +  auto *T = dyn_cast<IntegerType>(V->getType());
>     +  if (!T)
>     +    return false;
>     +
>     +  unsigned BW = T->getBitWidth();
>     +  APInt K0(BW, 0), K1(BW, 0);
>     +  computeKnownBits(V, K0, K1, DL);
>     +  return K0.countLeadingOnes() >= IterCount;
>     +}
>     +
>     +
>     +bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V,
>     +      unsigned IterCount) {
>     +  // Assume that all inputs to the value have the high bits zero.
>     +  // Check if the value itself preserves the zeros in the high bits.
>     +  if (auto *C = dyn_cast<ConstantInt>(V))
>     +    return C->getValue().countLeadingZeros() >= IterCount;
>     +
>     +  if (auto *I = dyn_cast<Instruction>(V)) {
>     +    switch (I->getOpcode()) {
>     +      case Instruction::And:
>     +      case Instruction::Or:
>     +      case Instruction::Xor:
>     +      case Instruction::LShr:
>     +      case Instruction::Select:
>     +      case Instruction::ICmp:
>     +      case Instruction::PHI:
>     +        return true;
>     +    }
>     +  }
>     +
>     +  return false;
>     +}
>     +
>     +
>     +bool PolynomialMultiplyRecognize::isOperandShifted(Instruction *I,
>     Value *Op) {
>     +  unsigned Opc = I->getOpcode();
>     +  if (Opc == Instruction::Shl || Opc == Instruction::LShr)
>     +    return Op != I->getOperand(1);
>     +  return true;
>     +}
>     +
>     +
>     +bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock
>     *LoopB,
>     +      BasicBlock *ExitB, unsigned IterCount) {
>     +  Value *CIV = getCountIV(LoopB);
>     +  if (CIV == nullptr)
>     +    return false;
>     +  auto *CIVTy = dyn_cast<IntegerType>(CIV->getType());
>     +  if (CIVTy == nullptr)
>     +    return false;
>     +
>     +  ValueSeq RShifts;
>     +  ValueSeq Early, Late, Cycled;
>     +
>     +  // Find all value cycles that contain logical right shifts by 1.
>     +  for (Instruction &I : *LoopB) {
>     +    using namespace PatternMatch;
>     +    Value *V = nullptr;
>     +    if (!match(&I, m_LShr(m_Value(V), m_One())))
>     +      continue;
>     +    ValueSeq C;
>     +    if (!findCycle(&I, V, C))
>     +      continue;
>     +
>     +    // Found a cycle.
>     +    C.insert(&I);
>     +    classifyCycle(&I, C, Early, Late);
>     +    Cycled.insert(C.begin(), C.end());
>     +    RShifts.insert(&I);
>     +  }
>     +
>     +  // Find the set of all values affected by the shift cycles, i.e. all
>     +  // cycled values, and (recursively) all their users.
>     +  ValueSeq Users(Cycled.begin(), Cycled.end());
>     +  for (unsigned i = 0; i < Users.size(); ++i) {
>     +    Value *V = Users[i];
>     +    if (!isa<IntegerType>(V->getType()))
>     +      return false;
>     +    auto *R = cast<Instruction>(V);
>     +    // If the instruction does not commute with shifts, the loop cannot
>     +    // be unshifted.
>     +    if (!commutesWithShift(R))
>     +      return false;
>     +    for (auto I = R->user_begin(), E = R->user_end(); I != E; ++I) {
>     +      auto *T = cast<Instruction>(*I);
>     +      // Skip users from outside of the loop. They will be handled
>     later.
>     +      // Also, skip the right-shifts and phi nodes, since they mix
>     early
>     +      // and late values.
>     +      if (T->getParent() != LoopB || RShifts.count(T) ||
>     isa<PHINode>(T))
>     +        continue;
>     +
>     +      Users.insert(T);
>     +      if (!classifyInst(T, Early, Late))
>     +        return false;
>     +    }
>     +  }
>     +
>     +  if (Users.size() == 0)
>     +    return false;
>     +
>     +  // Verify that high bits remain zero.
>     +  ValueSeq Internal(Users.begin(), Users.end());
>     +  ValueSeq Inputs;
>     +  for (unsigned i = 0; i < Internal.size(); ++i) {
>     +    auto *R = dyn_cast<Instruction>(Internal[i]);
>     +    if (!R)
>     +      continue;
>     +    for (Value *Op : R->operands()) {
>     +      auto *T = dyn_cast<Instruction>(Op);
>     +      if (T && T->getParent() != LoopB)
>     +        Inputs.insert(Op);
>     +      else
>     +        Internal.insert(Op);
>     +    }
>     +  }
>     +  for (Value *V : Inputs)
>     +    if (!highBitsAreZero(V, IterCount))
>     +      return false;
>     +  for (Value *V : Internal)
>     +    if (!keepsHighBitsZero(V, IterCount))
>     +      return false;
>     +
>     +  // Finally, the work can be done. Unshift each user.
>     +  IRBuilder<> IRB(LoopB);
>     +  std::map<Value*,Value*> ShiftMap;
>     +  typedef std::map<std::pair<Value*,Type*>,Value*> CastMapType;
>     +  CastMapType CastMap;
>     +
>     +  auto upcast = [] (CastMapType &CM, IRBuilder<> &IRB, Value *V,
>     +        IntegerType *Ty) -> Value* {
>     +    auto H = CM.find(std::make_pair(V, Ty));
>     +    if (H != CM.end())
>     +      return H->second;
>     +    Value *CV = IRB.CreateIntCast(V, Ty, false);
>     +    CM.insert(std::make_pair(std::make_pair(V, Ty), CV));
>     +    return CV;
>     +  };
>     +
>     +  for (auto I = LoopB->begin(), E = LoopB->end(); I != E; ++I) {
>     +    if (isa<PHINode>(I) || !Users.count(&*I))
>     +      continue;
>     +    using namespace PatternMatch;
>     +    // Match lshr x, 1.
>     +    Value *V = nullptr;
>     +    if (match(&*I, m_LShr(m_Value(V), m_One()))) {
>     +      replaceAllUsesOfWithIn(&*I, V, LoopB);
>     +      continue;
>     +    }
>     +    // For each non-cycled operand, replace it with the corresponding
>     +    // value shifted left.
>     +    for (auto &J : I->operands()) {
>     +      Value *Op = J.get();
>     +      if (!isOperandShifted(&*I, Op))
>     +        continue;
>     +      if (Users.count(Op))
>     +        continue;
>     +      // Skip shifting zeros.
>     +      if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())
>     +        continue;
>     +      // Check if we have already generated a shift for this value.
>     +      auto F = ShiftMap.find(Op);
>     +      Value *W = (F != ShiftMap.end()) ? F->second : nullptr;
>     +      if (W == nullptr) {
>     +        IRB.SetInsertPoint(&*I);
>     +        // First, the shift amount will be CIV or CIV+1, depending on
>     +        // whether the value is early or late. Instead of creating
>     CIV+1,
>     +        // do a single shift of the value.
>     +        Value *ShAmt = CIV, *ShVal = Op;
>     +        auto *VTy = cast<IntegerType>(ShVal->getType());
>     +        auto *ATy = cast<IntegerType>(ShAmt->getType());
>     +        if (Late.count(&*I))
>     +          ShVal = IRB.CreateShl(Op, ConstantInt::get(VTy, 1));
>     +        // Second, the types of the shifted value and the shift amount
>     +        // must match.
>     +        if (VTy != ATy) {
>     +          if (VTy->getBitWidth() < ATy->getBitWidth())
>     +            ShVal = upcast(CastMap, IRB, ShVal, ATy);
>     +          else
>     +            ShAmt = upcast(CastMap, IRB, ShAmt, VTy);
>     +        }
>     +        // Ready to generate the shift and memoize it.
>     +        W = IRB.CreateShl(ShVal, ShAmt);
>     +        ShiftMap.insert(std::make_pair(Op, W));
>     +      }
>     +      I->replaceUsesOfWith(Op, W);
>     +    }
>     +  }
>     +
>     +  // Update the users outside of the loop to account for having left
>     +  // shifts. They would normally be shifted right in the loop, so shift
>     +  // them right after the loop exit.
>     +  // Take advantage of the loop-closed SSA form, which has all the
>     post-
>     +  // loop values in phi nodes.
>     +  IRB.SetInsertPoint(ExitB, ExitB->getFirstInsertionPt());
>     +  for (auto P = ExitB->begin(), Q = ExitB->end(); P != Q; ++P) {
>     +    if (!isa<PHINode>(P))
>     +      break;
>     +    auto *PN = cast<PHINode>(P);
>     +    Value *U = PN->getIncomingValueForBlock(LoopB);
>     +    if (!Users.count(U))
>     +      continue;
>     +    Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(),
>     IterCount));
>     +    PN->replaceAllUsesWith(S);
>     +    // The above RAUW will create
>     +    //   S = lshr S, IterCount
>     +    // so we need to fix it back into
>     +    //   S = lshr PN, IterCount
>     +    cast<User>(S)->replaceUsesOfWith(S, PN);
>     +  }
>     +
>     +  return true;
>     +}
>     +
>     +
>     +void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) {
>     +  for (auto &I : *LoopB)
>     +    if (Value *SV = SimplifyInstruction(&I, DL, &TLI, &DT))
>     +      I.replaceAllUsesWith(SV);
>     +
>     +  for (auto I = LoopB->begin(), N = I; I != LoopB->end(); I = N) {
>     +    N = std::next(I);
>     +    RecursivelyDeleteTriviallyDeadInstructions(&*I, &TLI);
>     +  }
>     +}
>     +
>     +
>     +unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) {
>     +  // Arrays of coefficients of Q and the inverse, C.
>     +  // Q[i] = coefficient at x^i.
>     +  std::array<char,32> Q, C;
>     +
>     +  for (unsigned i = 0; i < 32; ++i) {
>     +    Q[i] = QP & 1;
>     +    QP >>= 1;
>     +  }
>     +  assert(Q[0] == 1);
>     +
>     +  // Find C, such that
>     +  // (Q[n]*x^n + ... + Q[1]*x + Q[0]) * (C[n]*x^n + ... + C[1]*x +
>     C[0]) = 1
>     +  //
>     +  // For it to have a solution, Q[0] must be 1. Since this is
>     Z2[x], the
>     +  // operations * and + are & and ^ respectively.
>     +  //
>     +  // Find C[i] recursively, by comparing i-th coefficient in the
>     product
>     +  // with 0 (or 1 for i=0).
>     +  //
>     +  // C[0] = 1, since C[0] = Q[0], and Q[0] = 1.
>     +  C[0] = 1;
>     +  for (unsigned i = 1; i < 32; ++i) {
>     +    // Solve for C[i] in:
>     +    //   C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i]Q[0] = 0
>     +    // This is equivalent to
>     +    //   C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i] = 0
>     +    // which is
>     +    //   C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] = C[i]
>     +    unsigned T = 0;
>     +    for (unsigned j = 0; j < i; ++j)
>     +      T = T ^ (C[j] & Q[i-j]);
>     +    C[i] = T;
>     +  }
>     +
>     +  unsigned QV = 0;
>     +  for (unsigned i = 0; i < 32; ++i)
>     +    if (C[i])
>     +      QV |= (1 << i);
>     +
>     +  return QV;
>     +}
>     +
>     +
>     +Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At,
>     +      ParsedValues &PV) {
>     +  IRBuilder<> B(&*At);
>     +  Module *M = At->getParent()->getParent()->getParent();
>     +  Value *PMF = Intrinsic::getDeclaration(M,
>     Intrinsic::hexagon_M4_pmpyw);
>     +
>     +  Value *P = PV.P, *Q = PV.Q, *P0 = P;
>     +  unsigned IC = PV.IterCount;
>     +
>     +  if (PV.M != nullptr)
>     +    P0 = P = B.CreateXor(P, PV.M);
>     +
>     +  // Create a bit mask to clear the high bits beyond IterCount.
>     +  auto *BMI = ConstantInt::get(P->getType(),
>     APInt::getLowBitsSet(32, IC));
>     +
>     +  if (PV.IterCount != 32)
>     +    P = B.CreateAnd(P, BMI);
>     +
>     +  if (PV.Inv) {
>     +    auto *QI = dyn_cast<ConstantInt>(PV.Q);
>     +    assert(QI && QI->getBitWidth() <= 32);
>     +
>     +    // Again, clearing bits beyond IterCount.
>     +    unsigned M = (1 << PV.IterCount) - 1;
>     +    unsigned Tmp = (QI->getZExtValue() | 1) & M;
>     +    unsigned QV = getInverseMxN(Tmp) & M;
>     +    auto *QVI = ConstantInt::get(QI->getType(), QV);
>     +    P = B.CreateCall(PMF, {P, QVI});
>     +    P = B.CreateTrunc(P, QI->getType());
>     +    if (IC != 32)
>     +      P = B.CreateAnd(P, BMI);
>     +  }
>     +
>     +  Value *R = B.CreateCall(PMF, {P, Q});
>     +
>     +  if (PV.M != nullptr)
>     +    R = B.CreateXor(R, B.CreateIntCast(P0, R->getType(), false));
>     +
>     +  return R;
>     +}
>     +
>     +
>     +bool PolynomialMultiplyRecognize::recognize() {
>     +  // Restrictions:
>     +  // - The loop must consist of a single block.
>     +  // - The iteration count must be known at compile-time.
>     +  // - The loop must have an induction variable starting from 0, and
>     +  //   incremented in each iteration of the loop.
>     +  BasicBlock *LoopB = CurLoop->getHeader();
>     +  if (LoopB != CurLoop->getLoopLatch())
>     +    return false;
>     +  BasicBlock *ExitB = CurLoop->getExitBlock();
>     +  if (ExitB == nullptr)
>     +    return false;
>     +  BasicBlock *EntryB = CurLoop->getLoopPreheader();
>     +  if (EntryB == nullptr)
>     +    return false;
>     +
>     +  unsigned IterCount = 0;
>     +  const SCEV *CT = SE.getBackedgeTakenCount(CurLoop);
>     +  if (isa<SCEVCouldNotCompute>(CT))
>     +    return false;
>     +  if (auto *CV = dyn_cast<SCEVConstant>(CT))
>     +    IterCount = CV->getValue()->getZExtValue() + 1;
>     +
>     +  Value *CIV = getCountIV(LoopB);
>     +  ParsedValues PV;
>     +  PV.IterCount = IterCount;
>     +
>     +  // Test function to see if a given select instruction is a part
>     of the
>     +  // pmpy pattern. The argument PreScan set to "true" indicates
>     that only
>     +  // a preliminary scan is needed, "false" indicated an exact match.
>     +  auto CouldBePmpy = [this, LoopB, EntryB, CIV, &PV] (bool PreScan)
>     +      -> std::function<bool (Instruction &I)> {
>     +    return [this, LoopB, EntryB, CIV, &PV, PreScan] (Instruction
>     &I) -> bool {
>     +      if (auto *SelI = dyn_cast<SelectInst>(&I))
>     +        return scanSelect(SelI, LoopB, EntryB, CIV, PV, PreScan);
>     +      return false;
>     +    };
>     +  };
>     +  auto PreF = std::find_if(LoopB->begin(), LoopB->end(),
>     CouldBePmpy(true));
>     +  if (PreF == LoopB->end())
>     +    return false;
>     +
>     +  if (!PV.Left) {
>     +    convertShiftsToLeft(LoopB, ExitB, IterCount);
>     +    cleanupLoopBody(LoopB);
>     +  }
>     +
>     +  auto PostF = std::find_if(LoopB->begin(), LoopB->end(),
>     CouldBePmpy(false));
>     +  if (PostF == LoopB->end())
>     +    return false;
>     +
>     +  DEBUG({
>     +    StringRef PP = (PV.M ? "(P+M)" : "P");
>     +    if (!PV.Inv)
>     +      dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n";
>     +    else
>     +      dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + "
>     +             << PP << "\n";
>     +    dbgs() << "  Res:" << *PV.Res << "\n  P:" << *PV.P << "\n";
>     +    if (PV.M)
>     +      dbgs() << "  M:" << *PV.M << "\n";
>     +    dbgs() << "  Q:" << *PV.Q << "\n";
>     +    dbgs() << "  Iteration count:" << PV.IterCount << "\n";
>     +  });
>     +
>     +  BasicBlock::iterator At(EntryB->getTerminator());
>     +  Value *PM = generate(At, PV);
>     +  if (PM == nullptr)
>     +    return false;
>     +
>     +  if (PM->getType() != PV.Res->getType())
>     +    PM = IRBuilder<>(&*At).CreateIntCast(PM, PV.Res->getType(), false);
>     +
>     +  PV.Res->replaceAllUsesWith(PM);
>     +  PV.Res->eraseFromParent();
>     +  return true;
>     +}
>     +
>     +
>     +unsigned HexagonLoopIdiomRecognize::getStoreSizeInBytes(StoreInst
>     *SI) {
>     +  uint64_t SizeInBits =
>     DL->getTypeSizeInBits(SI->getValueOperand()->getType());
>     +  assert(((SizeInBits & 7) || (SizeInBits >> 32) == 0) &&
>     +         "Don't overflow unsigned.");
>     +  return (unsigned)SizeInBits >> 3;
>     +}
>     +
>     +
>     +int HexagonLoopIdiomRecognize::getSCEVStride(const SCEVAddRecExpr *S) {
>     +  if (const SCEVConstant *SC =
>     dyn_cast<SCEVConstant>(S->getOperand(1)))
>     +    return SC->getAPInt().getSExtValue();
>     +  return 0;
>     +}
>     +
>     +
>     +bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop,
>     StoreInst *SI) {
>     +  bool IsVolatile = false;
>     +  if (SI->isVolatile() && HexagonVolatileMemcpy)
>     +    IsVolatile = true;
>     +  else if (!SI->isSimple())
>     +    return false;
>     +
>     +  Value *StoredVal = SI->getValueOperand();
>     +  Value *StorePtr = SI->getPointerOperand();
>     +
>     +  // Reject stores that are so large that they overflow an unsigned.
>     +  uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
>     +  if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
>     +    return false;
>     +
>     +  // See if the pointer expression is an AddRec like {base,+,1} on
>     the current
>     +  // loop, which indicates a strided store.  If we have something
>     else, it's a
>     +  // random store we can't handle.
>     +  auto *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
>     +  if (!StoreEv || StoreEv->getLoop() != CurLoop ||
>     !StoreEv->isAffine())
>     +    return false;
>     +
>     +  // Check to see if the stride matches the size of the store.  If
>     so, then we
>     +  // know that every byte is touched in the loop.
>     +  int Stride = getSCEVStride(StoreEv);
>     +  if (Stride == 0)
>     +    return false;
>     +  unsigned StoreSize = getStoreSizeInBytes(SI);
>     +  if (StoreSize != unsigned(std::abs(Stride)))
>     +    return false;
>     +
>     +  // The store must be feeding a non-volatile load.
>     +  LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
>     +  if (!LI || !LI->isSimple())
>     +    return false;
>     +
>     +  // See if the pointer expression is an AddRec like {base,+,1} on
>     the current
>     +  // loop, which indicates a strided load.  If we have something
>     else, it's a
>     +  // random load we can't handle.
>     +  Value *LoadPtr = LI->getPointerOperand();
>     +  auto *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
>     +  if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
>     +    return false;
>     +
>     +  // The store and load must share the same stride.
>     +  if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
>     +    return false;
>     +
>     +  // Success.  This store can be converted into a memcpy.
>     +  return true;
>     +}
>     +
>     +
>     +/// mayLoopAccessLocation - Return true if the specified loop might
>     access the
>     +/// specified pointer location, which is a loop-strided access.
>     The 'Access'
>     +/// argument specifies what the verboten forms of access are (read
>     or write).
>     +static bool
>     +mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
>     +                      const SCEV *BECount, unsigned StoreSize,
>     +                      AliasAnalysis &AA,
>     +                      SmallPtrSetImpl<Instruction *> &Ignored) {
>     +  // Get the location that may be stored across the loop.  Since
>     the access
>     +  // is strided positively through memory, we say that the modified
>     location
>     +  // starts at the pointer and has infinite size.
>     +  uint64_t AccessSize = MemoryLocation::UnknownSize;
>     +
>     +  // If the loop iterates a fixed number of times, we can refine
>     the access
>     +  // size to be exactly the size of the memset, which is
>     (BECount+1)*StoreSize
>     +  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
>     +    AccessSize = (BECst->getValue()->getZExtValue() + 1) * StoreSize;
>     +
>     +  // TODO: For this to be really effective, we have to dive into
>     the pointer
>     +  // operand in the store.  Store to &A[i] of 100 will always
>     return may alias
>     +  // with store of &A[100], we need to StoreLoc to be "A" with size
>     of 100,
>     +  // which will then no-alias a store to &A[100].
>     +  MemoryLocation StoreLoc(Ptr, AccessSize);
>     +
>     +  for (auto *B : L->blocks())
>     +    for (auto &I : *B)
>     +      if (Ignored.count(&I) == 0 && (AA.getModRefInfo(&I, StoreLoc)
>     & Access))
>     +        return true;
>     +
>     +  return false;
>     +}
>     +
>     +
>     +void HexagonLoopIdiomRecognize::collectStores(Loop *CurLoop,
>     BasicBlock *BB,
>     +      SmallVectorImpl<StoreInst*> &Stores) {
>     +  Stores.clear();
>     +  for (Instruction &I : *BB)
>     +    if (StoreInst *SI = dyn_cast<StoreInst>(&I))
>     +      if (isLegalStore(CurLoop, SI))
>     +        Stores.push_back(SI);
>     +}
>     +
>     +
>     +bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop,
>     +      StoreInst *SI, const SCEV *BECount) {
>     +  assert(SI->isSimple() || (SI->isVolatile() &&
>     HexagonVolatileMemcpy) &&
>     +             "Expected only non-volatile stores, or
>     Hexagon-specific memcpy"
>     +             "to volatile destination.");
>     +
>     +  Value *StorePtr = SI->getPointerOperand();
>     +  auto *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
>     +  unsigned Stride = getSCEVStride(StoreEv);
>     +  unsigned StoreSize = getStoreSizeInBytes(SI);
>     +  if (Stride != StoreSize)
>     +    return false;
>     +
>     +  // See if the pointer expression is an AddRec like {base,+,1} on
>     the current
>     +  // loop, which indicates a strided load.  If we have something
>     else, it's a
>     +  // random load we can't handle.
>     +  LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
>     +  auto *LoadEv =
>     cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
>     +
>     +  // The trip count of the loop and the base pointer of the addrec
>     SCEV is
>     +  // guaranteed to be loop invariant, which means that it should
>     dominate the
>     +  // header.  This allows us to insert code for it in the preheader.
>     +  BasicBlock *Preheader = CurLoop->getLoopPreheader();
>     +  Instruction *ExpPt = Preheader->getTerminator();
>     +  IRBuilder<> Builder(ExpPt);
>     +  SCEVExpander Expander(*SE, *DL, "hexagon-loop-idiom");
>     +
>     +  Type *IntPtrTy = Builder.getIntPtrTy(*DL,
>     SI->getPointerAddressSpace());
>     +
>     +  // Okay, we have a strided store "p[i]" of a loaded value.  We
>     can turn
>     +  // this into a memcpy/memmove in the loop preheader now if we
>     want.  However,
>     +  // this would be unsafe to do if there is anything else in the
>     loop that may
>     +  // read or write the memory region we're storing to.  For memcpy,
>     this
>     +  // includes the load that feeds the stores.  Check for an alias
>     by generating
>     +  // the base address and checking everything.
>     +  Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(),
>     +      Builder.getInt8PtrTy(SI->getPointerAddressSpace()), ExpPt);
>     +  Value *LoadBasePtr = nullptr;
>     +
>     +  bool Overlap = false;
>     +  bool DestVolatile = SI->isVolatile();
>     +  Type *BECountTy = BECount->getType();
>     +
>     +  if (DestVolatile) {
>     +    // The trip count must fit in i32, since it is the type of the
>     "num_words"
>     +    // argument to hexagon_memcpy_forward_vp4cp4n2.
>     +    if (StoreSize != 4 || DL->getTypeSizeInBits(BECountTy) > 32) {
>     +CleanupAndExit:
>     +      // If we generated new code for the base pointer, clean up.
>     +      Expander.clear();
>     +      if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) {
>     +        RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
>     +        StoreBasePtr = nullptr;
>     +      }
>     +      if (LoadBasePtr) {
>     +        RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
>     +        LoadBasePtr = nullptr;
>     +      }
>     +      return false;
>     +    }
>     +  }
>     +
>     +  SmallPtrSet<Instruction*, 2> Ignore1;
>     +  Ignore1.insert(SI);
>     +  if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
>     +                            StoreSize, *AA, Ignore1)) {
>     +    // Check if the load is the offending instruction.
>     +    Ignore1.insert(LI);
>     +    if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop,
>     BECount,
>     +                              StoreSize, *AA, Ignore1)) {
>     +      // Still bad. Nothing we can do.
>     +      goto CleanupAndExit;
>     +    }
>     +    // It worked with the load ignored.
>     +    Overlap = true;
>     +  }
>     +
>     +  if (!Overlap) {
>     +    if (DisableMemcpyIdiom || !HasMemcpy)
>     +      goto CleanupAndExit;
>     +  } else {
>     +    // Don't generate memmove if this function will be inlined. This is
>     +    // because the caller will undergo this transformation after
>     inlining.
>     +    Function *Func = CurLoop->getHeader()->getParent();
>     +    if (Func->hasFnAttribute(Attribute::AlwaysInline))
>     +      goto CleanupAndExit;
>     +
>     +    // In case of a memmove, the call to memmove will be executed
>     instead
>     +    // of the loop, so we need to make sure that there is nothing
>     else in
>     +    // the loop than the load, store and instructions that these
>     two depend
>     +    // on.
>     +    SmallVector<Instruction*,2> Insts;
>     +    Insts.push_back(SI);
>     +    Insts.push_back(LI);
>     +    if (!coverLoop(CurLoop, Insts))
>     +      goto CleanupAndExit;
>     +
>     +    if (DisableMemmoveIdiom || !HasMemmove)
>     +      goto CleanupAndExit;
>     +    bool IsNested = CurLoop->getParentLoop() != 0;
>     +    if (IsNested && OnlyNonNestedMemmove)
>     +      goto CleanupAndExit;
>     +  }
>     +
>     +  // For a memcpy, we have to make sure that the input array is not
>     being
>     +  // mutated by the loop.
>     +  LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(),
>     +      Builder.getInt8PtrTy(LI->getPointerAddressSpace()), ExpPt);
>     +
>     +  SmallPtrSet<Instruction*, 2> Ignore2;
>     +  Ignore2.insert(SI);
>     +  if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount,
>     StoreSize,
>     +                            *AA, Ignore2))
>     +    goto CleanupAndExit;
>     +
>     +  // Check the stride.
>     +  bool StridePos = getSCEVStride(LoadEv) >= 0;
>     +
>     +  // Currently, the volatile memcpy only emulates traversing memory
>     forward.
>     +  if (!StridePos && DestVolatile)
>     +    goto CleanupAndExit;
>     +
>     +  bool RuntimeCheck = (Overlap || DestVolatile);
>     +
>     +  BasicBlock *ExitB;
>     +  if (RuntimeCheck) {
>     +    // The runtime check needs a single exit block.
>     +    SmallVector<BasicBlock*, 8> ExitBlocks;
>     +    CurLoop->getUniqueExitBlocks(ExitBlocks);
>     +    if (ExitBlocks.size() != 1)
>     +      goto CleanupAndExit;
>     +    ExitB = ExitBlocks[0];
>     +  }
>     +
>     +  // The # stored bytes is (BECount+1)*Size.  Expand the trip count
>     out to
>     +  // pointer size if it isn't already.
>     +  LLVMContext &Ctx = SI->getContext();
>     +  BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
>     +  unsigned Alignment = std::min(SI->getAlignment(),
>     LI->getAlignment());
>     +  DebugLoc DLoc = SI->getDebugLoc();
>     +
>     +  const SCEV *NumBytesS =
>     +      SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
>     +  if (StoreSize != 1)
>     +    NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy,
>     StoreSize),
>     +                               SCEV::FlagNUW);
>     +  Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt);
>     +  if (Instruction *In = dyn_cast<Instruction>(NumBytes))
>     +    if (Value *Simp = SimplifyInstruction(In, *DL, TLI, DT))
>     +      NumBytes = Simp;
>     +
>     +  CallInst *NewCall;
>     +
>     +  if (RuntimeCheck) {
>     +    unsigned Threshold = RuntimeMemSizeThreshold;
>     +    if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) {
>     +      uint64_t C = CI->getZExtValue();
>     +      if (Threshold != 0 && C < Threshold)
>     +        goto CleanupAndExit;
>     +      if (C < CompileTimeMemSizeThreshold)
>     +        goto CleanupAndExit;
>     +    }
>     +
>     +    BasicBlock *Header = CurLoop->getHeader();
>     +    Function *Func = Header->getParent();
>     +    Loop *ParentL = LF->getLoopFor(Preheader);
>     +    StringRef HeaderName = Header->getName();
>     +
>     +    // Create a new (empty) preheader, and update the PHI nodes in the
>     +    // header to use the new preheader.
>     +    BasicBlock *NewPreheader = BasicBlock::Create(Ctx,
>     HeaderName+".rtli.ph <http://rtli.ph>",
>     +                                                  Func, Header);
>     +    if (ParentL)
>     +      ParentL->addBasicBlockToLoop(NewPreheader, *LF);
>     +    IRBuilder<>(NewPreheader).CreateBr(Header);
>     +    for (auto &In : *Header) {
>     +      PHINode *PN = dyn_cast<PHINode>(&In);
>     +      if (!PN)
>     +        break;
>     +      int bx = PN->getBasicBlockIndex(Preheader);
>     +      if (bx >= 0)
>     +        PN->setIncomingBlock(bx, NewPreheader);
>     +    }
>     +    DT->addNewBlock(NewPreheader, Preheader);
>     +    DT->changeImmediateDominator(Header, NewPreheader);
>     +
>     +    // Check for safe conditions to execute memmove.
>     +    // If stride is positive, copying things from higher to lower
>     addresses
>     +    // is equivalent to memmove.  For negative stride, it's the
>     other way
>     +    // around.  Copying forward in memory with positive stride may
>     not be
>     +    // same as memmove since we may be copying values that we just
>     stored
>     +    // in some previous iteration.
>     +    Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy);
>     +    Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy);
>     +    Value *LowA = StridePos ? SA : LA;
>     +    Value *HighA = StridePos ? LA : SA;
>     +    Value *CmpA = Builder.CreateICmpULT(LowA, HighA);
>     +    Value *Cond = CmpA;
>     +
>     +    // Check for distance between pointers.
>     +    Value *Dist = Builder.CreateSub(HighA, LowA);
>     +    Value *CmpD = Builder.CreateICmpSLT(NumBytes, Dist);
>     +    Value *CmpEither = Builder.CreateOr(Cond, CmpD);
>     +    Cond = CmpEither;
>     +
>     +    if (Threshold != 0) {
>     +      Type *Ty = NumBytes->getType();
>     +      Value *Thr = ConstantInt::get(Ty, Threshold);
>     +      Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes);
>     +      Value *CmpBoth = Builder.CreateAnd(Cond, CmpB);
>     +      Cond = CmpBoth;
>     +    }
>     +    BasicBlock *MemmoveB = BasicBlock::Create(Ctx,
>     Header->getName()+".rtli",
>     +                                              Func, NewPreheader);
>     +    if (ParentL)
>     +      ParentL->addBasicBlockToLoop(MemmoveB, *LF);
>     +    Instruction *OldT = Preheader->getTerminator();
>     +    Builder.CreateCondBr(Cond, MemmoveB, NewPreheader);
>     +    OldT->eraseFromParent();
>     +    Preheader->setName(Preheader->getName()+".old");
>     +    DT->addNewBlock(MemmoveB, Preheader);
>     +    // Find the new immediate dominator of the exit block.
>     +    BasicBlock *ExitD = Preheader;
>     +    for (auto PI = pred_begin(ExitB), PE = pred_end(ExitB); PI !=
>     PE; ++PI) {
>     +      BasicBlock *PB = *PI;
>     +      ExitD = DT->findNearestCommonDominator(ExitD, PB);
>     +      if (!ExitD)
>     +        break;
>     +    }
>     +    // If the prior immediate dominator of ExitB was dominated by the
>     +    // old preheader, then the old preheader becomes the new immediate
>     +    // dominator.  Otherwise don't change anything (because the newly
>     +    // added blocks are dominated by the old preheader).
>     +    if (ExitD && DT->dominates(Preheader, ExitD)) {
>     +      DomTreeNode *BN = DT->getNode(ExitB);
>     +      DomTreeNode *DN = DT->getNode(ExitD);
>     +      BN->setIDom(DN);
>     +    }
>     +
>     +    // Add a call to memmove to the conditional block.
>     +    IRBuilder<> CondBuilder(MemmoveB);
>     +    CondBuilder.CreateBr(ExitB);
>     +    CondBuilder.SetInsertPoint(MemmoveB->getTerminator());
>     +
>     +    if (DestVolatile) {
>     +      Type *Int32Ty = Type::getInt32Ty(Ctx);
>     +      Type *Int32PtrTy = Type::getInt32PtrTy(Ctx);
>     +      Type *VoidTy = Type::getVoidTy(Ctx);
>     +      Module *M = Func->getParent();
>     +      Constant *CF =
>     M->getOrInsertFunction(HexagonVolatileMemcpyName, VoidTy,
>     +                                            Int32PtrTy, Int32PtrTy,
>     Int32Ty,
>     +                                            nullptr);
>     +      Function *Fn = cast<Function>(CF);
>     +      Fn->setLinkage(Function::ExternalLinkage);
>     +
>     +      const SCEV *OneS = SE->getConstant(Int32Ty, 1);
>     +      const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount,
>     Int32Ty);
>     +      const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS,
>     SCEV::FlagNUW);
>     +      Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty,
>     +
>      MemmoveB->getTerminator());
>     +      if (Instruction *In = dyn_cast<Instruction>(NumWords))
>     +        if (Value *Simp = SimplifyInstruction(In, *DL, TLI, DT))
>     +          NumWords = Simp;
>     +
>     +      Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy)
>     +                      ? StoreBasePtr
>     +                      : CondBuilder.CreateBitCast(StoreBasePtr,
>     Int32PtrTy);
>     +      Value *Op1 = (LoadBasePtr->getType() == Int32PtrTy)
>     +                      ? LoadBasePtr
>     +                      : CondBuilder.CreateBitCast(LoadBasePtr,
>     Int32PtrTy);
>     +      NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords});
>     +    } else {
>     +      NewCall = CondBuilder.CreateMemMove(StoreBasePtr, LoadBasePtr,
>     +                                          NumBytes, Alignment);
>     +    }
>     +  } else {
>     +    NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr,
>     +                                   NumBytes, Alignment);
>     +    // Okay, the memcpy has been formed.  Zap the original store and
>     +    // anything that feeds into it.
>     +    RecursivelyDeleteTriviallyDeadInstructions(SI, TLI);
>     +  }
>     +
>     +  NewCall->setDebugLoc(DLoc);
>     +
>     +  DEBUG(dbgs() << "  Formed " << (Overlap ? "memmove: " : "memcpy: ")
>     +               << *NewCall << "\n"
>     +               << "    from load ptr=" << *LoadEv << " at: " << *LI
>     << "\n"
>     +               << "    from store ptr=" << *StoreEv << " at: " <<
>     *SI << "\n");
>     +
>     +  return true;
>     +}
>     +
>     +
>     +// \brief Check if the instructions in Insts, together with their
>     dependencies
>     +// cover the loop in the sense that the loop could be safely
>     eliminated once
>     +// the instructions in Insts are removed.
>     +bool HexagonLoopIdiomRecognize::coverLoop(Loop *L,
>     +      SmallVectorImpl<Instruction*> &Insts) const {
>     +  SmallSet<BasicBlock*,8> LoopBlocks;
>     +  for (auto *B : L->blocks())
>     +    LoopBlocks.insert(B);
>     +
>     +  SetVector<Instruction*> Worklist(Insts.begin(), Insts.end());
>     +
>     +  // Collect all instructions from the loop that the instructions
>     in Insts
>     +  // depend on (plus their dependencies, etc.).  These instructions
>     will
>     +  // constitute the expression trees that feed those in Insts, but
>     the trees
>     +  // will be limited only to instructions contained in the loop.
>     +  for (unsigned i = 0; i < Worklist.size(); ++i) {
>     +    Instruction *In = Worklist[i];
>     +    for (auto I = In->op_begin(), E = In->op_end(); I != E; ++I) {
>     +      Instruction *OpI = dyn_cast<Instruction>(I);
>     +      if (!OpI)
>     +        continue;
>     +      BasicBlock *PB = OpI->getParent();
>     +      if (!LoopBlocks.count(PB))
>     +        continue;
>     +      Worklist.insert(OpI);
>     +    }
>     +  }
>     +
>     +  // Scan all instructions in the loop, if any of them have a user
>     outside
>     +  // of the loop, or outside of the expressions collected above,
>     then either
>     +  // the loop has a side-effect visible outside of it, or there are
>     +  // instructions in it that are not involved in the original set
>     Insts.
>     +  for (auto *B : L->blocks()) {
>     +    for (auto &In : *B) {
>     +      if (isa<BranchInst>(In) || isa<DbgInfoIntrinsic>(In))
>     +        continue;
>     +      if (!Worklist.count(&In) && In.mayHaveSideEffects())
>     +        return false;
>     +      for (const auto &K : In.users()) {
>     +        Instruction *UseI = dyn_cast<Instruction>(K);
>     +        if (!UseI)
>     +          continue;
>     +        BasicBlock *UseB = UseI->getParent();
>     +        if (LF->getLoopFor(UseB) != L)
>     +          return false;
>     +      }
>     +    }
>     +  }
>     +
>     +  return true;
>     +}
>     +
>     +/// runOnLoopBlock - Process the specified block, which lives in a
>     counted loop
>     +/// with the specified backedge count.  This block is known to be
>     in the current
>     +/// loop and not in any subloops.
>     +bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop,
>     BasicBlock *BB,
>     +      const SCEV *BECount, SmallVectorImpl<BasicBlock*> &ExitBlocks) {
>     +  // We can only promote stores in this block if they are
>     unconditionally
>     +  // executed in the loop.  For a block to be unconditionally
>     executed, it has
>     +  // to dominate all the exit blocks of the loop.  Verify this now.
>     +  auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool {
>     +    return DT->dominates(BB, EB);
>     +  };
>     +  if (!std::all_of(ExitBlocks.begin(), ExitBlocks.end(),
>     DominatedByBB))
>     +    return false;
>     +
>     +  bool MadeChange = false;
>     +  // Look for store instructions, which may be optimized to
>     memset/memcpy.
>     +  SmallVector<StoreInst*,8> Stores;
>     +  collectStores(CurLoop, BB, Stores);
>     +
>     +  // Optimize the store into a memcpy, if it feeds an similarly
>     strided load.
>     +  for (auto &SI : Stores)
>     +    MadeChange |= processCopyingStore(CurLoop, SI, BECount);
>     +
>     +  return MadeChange;
>     +}
>     +
>     +
>     +bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) {
>     +  PolynomialMultiplyRecognize PMR(L, *DL, *DT, *TLI, *SE);
>     +  if (PMR.recognize())
>     +    return true;
>     +
>     +  if (!HasMemcpy && !HasMemmove)
>     +    return false;
>     +
>     +  const SCEV *BECount = SE->getBackedgeTakenCount(L);
>     +  assert(!isa<SCEVCouldNotCompute>(BECount) &&
>     +         "runOnCountableLoop() called on a loop without a predictable"
>     +         "backedge-taken count");
>     +
>     +  SmallVector<BasicBlock *, 8> ExitBlocks;
>     +  L->getUniqueExitBlocks(ExitBlocks);
>     +
>     +  bool Changed = false;
>     +
>     +  // Scan all the blocks in the loop that are not in subloops.
>     +  for (auto *BB : L->getBlocks()) {
>     +    // Ignore blocks in subloops.
>     +    if (LF->getLoopFor(BB) != L)
>     +      continue;
>     +    Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks);
>     +  }
>     +
>     +  return Changed;
>     +}
>     +
>     +
>     +bool HexagonLoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager
>     &LPM) {
>     +  const Module &M = *L->getHeader()->getParent()->getParent();
>     +  if (Triple(M.getTargetTriple()).getArch() != Triple::hexagon)
>     +    return false;
>     +
>     +  if (skipLoop(L))
>     +    return false;
>     +
>     +  // If the loop could not be converted to canonical form, it must
>     have an
>     +  // indirectbr in it, just give up.
>     +  if (!L->getLoopPreheader())
>     +    return false;
>     +
>     +  // Disable loop idiom recognition if the function's name is a
>     common idiom.
>     +  StringRef Name = L->getHeader()->getParent()->getName();
>     +  if (Name == "memset" || Name == "memcpy" || Name == "memmove")
>     +    return false;
>     +
>     +  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
>     +  DL = &L->getHeader()->getModule()->getDataLayout();
>     +  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
>     +  LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
>     +  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
>     +  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
>     +
>     +  HasMemcpy = TLI->has(LibFunc_memcpy);
>     +  HasMemmove = TLI->has(LibFunc_memmove);
>     +
>     +  if (SE->hasLoopInvariantBackedgeTakenCount(L))
>     +    return runOnCountableLoop(L);
>     +  return false;
>     +}
>     +
>     +
>     +Pass *llvm::createHexagonLoopIdiomPass() {
>     +  return new HexagonLoopIdiomRecognize();
>     +}
>     +
>
>     Modified: llvm/trunk/lib/Target/Hexagon/HexagonTargetMachine.cpp
>     URL:
>     http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonTargetMachine.cpp?rev=293213&r1=293212&r2=293213&view=diff
>     <http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonTargetMachine.cpp?rev=293213&r1=293212&r2=293213&view=diff>
>     ==============================================================================
>     --- llvm/trunk/lib/Target/Hexagon/HexagonTargetMachine.cpp (original)
>     +++ llvm/trunk/lib/Target/Hexagon/HexagonTargetMachine.cpp Thu Jan
>     26 15:41:10 2017
>     @@ -24,6 +24,7 @@
>      #include "llvm/Support/CommandLine.h"
>      #include "llvm/Support/TargetRegistry.h"
>      #include "llvm/Transforms/Scalar.h"
>     +#include "llvm/Transforms/IPO/PassManagerBuilder.h"
>
>      using namespace llvm;
>
>     @@ -98,11 +99,6 @@ static cl::opt<bool> EnableVectorPrint("
>      extern "C" int HexagonTargetMachineModule;
>      int HexagonTargetMachineModule = 0;
>
>     -extern "C" void LLVMInitializeHexagonTarget() {
>     -  // Register the target.
>     -  RegisterTargetMachine<HexagonTargetMachine> X(getTheHexagonTarget());
>     -}
>     -
>      static ScheduleDAGInstrs
>     *createVLIWMachineSched(MachineSchedContext *C) {
>        return new VLIWMachineScheduler(C,
>     make_unique<ConvergingVLIWScheduler>());
>      }
>     @@ -114,6 +110,8 @@ SchedCustomRegistry("hexagon", "Run Hexa
>      namespace llvm {
>        extern char &HexagonExpandCondsetsID;
>        void initializeHexagonExpandCondsetsPass(PassRegistry&);
>     +  void initializeHexagonLoopIdiomRecognizePass(PassRegistry&);
>     +  Pass *createHexagonLoopIdiomPass();
>
>        FunctionPass *createHexagonBitSimplify();
>        FunctionPass *createHexagonBranchRelaxation();
>     @@ -150,6 +148,12 @@ static Reloc::Model getEffectiveRelocMod
>        return *RM;
>      }
>
>     +extern "C" void LLVMInitializeHexagonTarget() {
>     +  // Register the target.
>     +  RegisterTargetMachine<HexagonTargetMachine> X(getTheHexagonTarget());
>     +
>     initializeHexagonLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
>     +}
>     +
>      HexagonTargetMachine::HexagonTargetMachine(const Target &T, const
>     Triple &TT,
>                                                 StringRef CPU, StringRef FS,
>                                                 const TargetOptions
>     &Options,
>     @@ -196,6 +200,14 @@ HexagonTargetMachine::getSubtargetImpl(c
>        return I.get();
>      }
>
>     +void HexagonTargetMachine::adjustPassManager(PassManagerBuilder &PMB) {
>     +  PMB.addExtension(
>     +    PassManagerBuilder::EP_LateLoopOptimizations,
>     +    [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
>     +      PM.add(createHexagonLoopIdiomPass());
>     +    });
>     +}
>     +
>      TargetIRAnalysis HexagonTargetMachine::getTargetIRAnalysis() {
>        return TargetIRAnalysis([this](const Function &F) {
>          return TargetTransformInfo(HexagonTTIImpl(this, F));
>
>     Modified: llvm/trunk/lib/Target/Hexagon/HexagonTargetMachine.h
>     URL:
>     http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonTargetMachine.h?rev=293213&r1=293212&r2=293213&view=diff
>     <http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonTargetMachine.h?rev=293213&r1=293212&r2=293213&view=diff>
>     ==============================================================================
>     --- llvm/trunk/lib/Target/Hexagon/HexagonTargetMachine.h (original)
>     +++ llvm/trunk/lib/Target/Hexagon/HexagonTargetMachine.h Thu Jan 26
>     15:41:10 2017
>     @@ -37,6 +37,7 @@ public:
>
>        static unsigned getModuleMatchQuality(const Module &M);
>
>     +  void adjustPassManager(PassManagerBuilder &PMB) override;
>        TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
>        TargetIRAnalysis getTargetIRAnalysis() override;
>
>
>     Added: llvm/trunk/test/CodeGen/Hexagon/loop-idiom/hexagon-memmove1.ll
>     URL:
>     http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/loop-idiom/hexagon-memmove1.ll?rev=293213&view=auto
>     <http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/loop-idiom/hexagon-memmove1.ll?rev=293213&view=auto>
>     ==============================================================================
>     --- llvm/trunk/test/CodeGen/Hexagon/loop-idiom/hexagon-memmove1.ll
>     (added)
>     +++ llvm/trunk/test/CodeGen/Hexagon/loop-idiom/hexagon-memmove1.ll
>     Thu Jan 26 15:41:10 2017
>     @@ -0,0 +1,36 @@
>     +; Check for recognizing the "memmove" idiom.
>     +; RUN: opt -basicaa -hexagon-loop-idiom -S -mtriple
>     hexagon-unknown-elf < %s \
>     +; RUN:  | FileCheck %s
>     +; CHECK: call void @llvm.memmove
>     +
>     +; Function Attrs: norecurse nounwind
>     +define void @foo(i32* nocapture %A, i32* nocapture readonly %B, i32
>     %n) #0 {
>     +entry:
>     +  %cmp1 = icmp sgt i32 %n, 0
>     +  br i1 %cmp1, label %for.body.preheader, label %for.end
>     +
>     +for.body.preheader:                               ; preds = %entry
>     +  %arrayidx.gep = getelementptr i32, i32* %B, i32 0
>     +  %arrayidx1.gep = getelementptr i32, i32* %A, i32 0
>     +  br label %for.body
>     +
>     +for.body:                                         ; preds =
>     %for.body.preheader, %for.body
>     +  %arrayidx.phi = phi i32* [ %arrayidx.gep, %for.body.preheader ],
>     [ %arrayidx.inc, %for.body ]
>     +  %arrayidx1.phi = phi i32* [ %arrayidx1.gep, %for.body.preheader
>     ], [ %arrayidx1.inc, %for.body ]
>     +  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
>     +  %0 = load i32, i32* %arrayidx.phi, align 4
>     +  store i32 %0, i32* %arrayidx1.phi, align 4
>     +  %inc = add nuw nsw i32 %i.02, 1
>     +  %exitcond = icmp ne i32 %inc, %n
>     +  %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
>     +  %arrayidx1.inc = getelementptr i32, i32* %arrayidx1.phi, i32 1
>     +  br i1 %exitcond, label %for.body, label %for.end.loopexit
>     +
>     +for.end.loopexit:                                 ; preds = %for.body
>     +  br label %for.end
>     +
>     +for.end:                                          ; preds =
>     %for.end.loopexit, %entry
>     +  ret void
>     +}
>     +
>     +attributes #0 = { nounwind }
>
>     Added: llvm/trunk/test/CodeGen/Hexagon/loop-idiom/hexagon-memmove2.ll
>     URL:
>     http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/loop-idiom/hexagon-memmove2.ll?rev=293213&view=auto
>     <http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/loop-idiom/hexagon-memmove2.ll?rev=293213&view=auto>
>     ==============================================================================
>     --- llvm/trunk/test/CodeGen/Hexagon/loop-idiom/hexagon-memmove2.ll
>     (added)
>     +++ llvm/trunk/test/CodeGen/Hexagon/loop-idiom/hexagon-memmove2.ll
>     Thu Jan 26 15:41:10 2017
>     @@ -0,0 +1,36 @@
>     +; RUN: opt -basicaa -hexagon-loop-idiom -S -mtriple
>     hexagon-unknown-elf < %s \
>     +; RUN:  | FileCheck %s
>     +
>     +define void @PR14241(i32* %s, i64 %size) #0 {
>     +; Ensure that we don't form a memcpy for strided loops. Briefly,
>     when we taught
>     +; LoopIdiom about memmove and strided loops, this got miscompiled
>     into a memcpy
>     +; instead of a memmove. If we get the memmove transform back, this
>     will catch
>     +; regressions.
>     +;
>     +; CHECK-LABEL: @PR14241(
>     +
>     +entry:
>     +  %end.idx = add i64 %size, -1
>     +  %end.ptr = getelementptr inbounds i32, i32* %s, i64 %end.idx
>     +  br label %while.body
>     +; CHECK-NOT: memcpy
>     +; CHECK: memmove
>     +
>     +while.body:
>     +  %phi.ptr = phi i32* [ %s, %entry ], [ %next.ptr, %while.body ]
>     +  %src.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1
>     +  %val = load i32, i32* %src.ptr, align 4
>     +; CHECK: load
>     +  %dst.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 0
>     +  store i32 %val, i32* %dst.ptr, align 4
>     +; CHECK: store
>     +  %next.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1
>     +  %cmp = icmp eq i32* %next.ptr, %end.ptr
>     +  br i1 %cmp, label %exit, label %while.body
>     +
>     +exit:
>     +  ret void
>     +; CHECK: ret void
>     +}
>     +
>     +attributes #0 = { nounwind }
>
>     Added: llvm/trunk/test/CodeGen/Hexagon/loop-idiom/lcssa.ll
>     URL:
>     http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/loop-idiom/lcssa.ll?rev=293213&view=auto
>     <http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/loop-idiom/lcssa.ll?rev=293213&view=auto>
>     ==============================================================================
>     --- llvm/trunk/test/CodeGen/Hexagon/loop-idiom/lcssa.ll (added)
>     +++ llvm/trunk/test/CodeGen/Hexagon/loop-idiom/lcssa.ll Thu Jan 26
>     15:41:10 2017
>     @@ -0,0 +1,46 @@
>     +; RUN: opt -hexagon-loop-idiom -loop-deletion -gvn -S < %s
>     +; REQUIRES: asserts
>     +
>     +; This tests that the HexagonLoopIdiom pass does not mark LCSSA
>     information
>     +; as preserved. The pass calls SimplifyInstruction is a couple of
>     places,
>     +; which can invalidate LCSSA. Specifically, the uses of a LCSSA phi
>     variable
>     +; are replaced by the incoming value.
>     +
>     +define hidden void @test() local_unnamed_addr #0 {
>     +entry:
>     +  br label %if.then63
>     +
>     +if.then63:
>     +  br i1 undef, label %do.body311, label %if.end375
>     +
>     +do.body311:
>     +  br i1 undef, label %do.end318, label %do.body311
>     +
>     +do.end318:
>     +  br i1 undef, label %if.end322, label %if.end375
>     +
>     +if.end322:
>     +  %sub325 = sub i32 undef, undef
>     +  br i1 undef, label %do.end329, label %do.body311
>     +
>     +do.end329:
>     +  %sub325.lcssa = phi i32 [ %sub325, %if.end322 ]
>     +  br label %do.body330
>     +
>     +do.body330:
>     +  %row_width.7 = phi i32 [ %sub325.lcssa, %do.end329 ], [ %dec334,
>     %do.body330 ]
>     +  %sp.5 = phi i8* [ undef, %do.end329 ], [ %incdec.ptr331,
>     %do.body330 ]
>     +  %dp.addr.5 = phi i8* [ undef, %do.end329 ], [ %incdec.ptr332,
>     %do.body330 ]
>     +  %0 = load i8, i8* %sp.5, align 1
>     +  store i8 %0, i8* %dp.addr.5, align 1
>     +  %incdec.ptr332 = getelementptr inbounds i8, i8* %dp.addr.5, i32 1
>     +  %incdec.ptr331 = getelementptr inbounds i8, i8* %sp.5, i32 1
>     +  %dec334 = add i32 %row_width.7, -1
>     +  %cmp335 = icmp eq i32 %dec334, 0
>     +  br i1 %cmp335, label %if.end375, label %do.body330
>     +
>     +if.end375:
>     +  ret void
>     +}
>     +
>     +attributes #0 = { nounwind }
>
>     Added: llvm/trunk/test/CodeGen/Hexagon/loop-idiom/nullptr-crash.ll
>     URL:
>     http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/loop-idiom/nullptr-crash.ll?rev=293213&view=auto
>     <http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/loop-idiom/nullptr-crash.ll?rev=293213&view=auto>
>     ==============================================================================
>     --- llvm/trunk/test/CodeGen/Hexagon/loop-idiom/nullptr-crash.ll (added)
>     +++ llvm/trunk/test/CodeGen/Hexagon/loop-idiom/nullptr-crash.ll Thu
>     Jan 26 15:41:10 2017
>     @@ -0,0 +1,24 @@
>     +; RUN: opt -basicaa -hexagon-loop-idiom -mtriple
>     hexagon-unknown-elf < %s
>     +; REQUIRES: asserts
>     +
>     +target triple = "hexagon"
>     +
>     +; Function Attrs: nounwind
>     +define void @fred(i8 zeroext %L) #0 {
>     +entry:
>     +  br i1 undef, label %if.end53, label %while.body37
>     +
>     +while.body37:                                     ; preds =
>     %while.body37, %entry
>     +  %i.121 = phi i32 [ %inc46, %while.body37 ], [ 0, %entry ]
>     +  %shl = shl i32 1, %i.121
>     +  %and39 = and i32 %shl, undef
>     +  %tobool40 = icmp eq i32 %and39, 0
>     +  %inc46 = add nuw nsw i32 %i.121, 1
>     +  %storemerge = select i1 %tobool40, i8 %L, i8 0
>     +  br i1 undef, label %while.body37, label %if.end53
>     +
>     +if.end53:                                         ; preds =
>     %while.body37, %entry
>     +  ret void
>     +}
>     +
>     +attributes #0 = { nounwind }
>
>     Added: llvm/trunk/test/CodeGen/Hexagon/loop-idiom/pmpy.ll
>     URL:
>     http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/loop-idiom/pmpy.ll?rev=293213&view=auto
>     <http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/loop-idiom/pmpy.ll?rev=293213&view=auto>
>     ==============================================================================
>     --- llvm/trunk/test/CodeGen/Hexagon/loop-idiom/pmpy.ll (added)
>     +++ llvm/trunk/test/CodeGen/Hexagon/loop-idiom/pmpy.ll Thu Jan 26
>     15:41:10 2017
>     @@ -0,0 +1,33 @@
>     +; RUN: opt -hexagon-loop-idiom < %s
>     -mtriple=hexagon-unknown-unknown -S \
>     +; RUN:  | FileCheck %s
>     +
>     +target triple = "hexagon"
>     +
>     +; CHECK: define i64 @basic_pmpy
>     +; CHECK: llvm.hexagon.M4.pmpyw
>     +define i64 @basic_pmpy(i32 %P, i32 %Q) #0 {
>     +entry:
>     +  %conv = zext i32 %Q to i64
>     +  br label %for.body
>     +
>     +for.body:                                         ; preds = %entry,
>     %for.body
>     +  %i.07 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
>     +  %R.06 = phi i64 [ 0, %entry ], [ %xor.R.06, %for.body ]
>     +  %shl = shl i32 1, %i.07
>     +  %and = and i32 %shl, %P
>     +  %tobool = icmp eq i32 %and, 0
>     +  %sh_prom = zext i32 %i.07 to i64
>     +  %shl1 = shl i64 %conv, %sh_prom
>     +  %xor = xor i64 %shl1, %R.06
>     +  %xor.R.06 = select i1 %tobool, i64 %R.06, i64 %xor
>     +  %inc = add nuw nsw i32 %i.07, 1
>     +  %exitcond = icmp ne i32 %inc, 32
>     +  br i1 %exitcond, label %for.body, label %for.end
>     +
>     +for.end:                                          ; preds = %for.body
>     +  %R.1.lcssa = phi i64 [ %xor.R.06, %for.body ]
>     +  ret i64 %R.1.lcssa
>     +}
>     +
>     +attributes #0 = { nounwind }
>     +
>
>
>     _______________________________________________
>     llvm-commits mailing list
>     llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>
>     http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>     <http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits>
>
>

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, 
hosted by The Linux Foundation