[llvm] r298282 - [Hexagon] Recognize polynomial-modulo loop idiom again

Krzysztof Parzyszek via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 20 15:05:02 PDT 2017


Sure. Checking...

-Krzysztof

On 3/20/2017 4:58 PM, Vitaly Buka wrote:
> Could you please take a look?
> http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-fast/builds/3574/steps/check-llvm%20asan/logs/stdio
>
> Direct leak of 136 byte(s) in 1 object(s) allocated from: #0 0x98a100 in
> operator new(unsigned long)
> /mnt/b/sanitizer-buildbot3/sanitizer-x86_64-linux-fast/build/llvm/projects/compiler-rt/lib/asan/asan_new_delete.cc:82
> #1 0x3d355e5 in allocateFixedOperandUser
> /mnt/b/sanitizer-buildbot3/sanitizer-x86_64-linux-fast/build/llvm/lib/IR/User.cpp:128:7
> #2 0x3d355e5 in llvm::User::operator new(unsigned long, unsigned int)
> /mnt/b/sanitizer-buildbot3/sanitizer-x86_64-linux-fast/build/llvm/lib/IR/User.cpp:146
> #3 0x3c3422a in Create
> /mnt/b/sanitizer-buildbot3/sanitizer-x86_64-linux-fast/build/llvm/include/llvm/IR/Instructions.h:1962:23
> #4 0x3c3422a in llvm::SelectInst::cloneImpl() const
> /mnt/b/sanitizer-buildbot3/sanitizer-x86_64-linux-fast/build/llvm/lib/IR/Instructions.cpp:3929
> #5 0x3c01f42 in llvm::Instruction::clone() const
> /mnt/b/sanitizer-buildbot3/sanitizer-x86_64-linux-fast/build/llvm/include/llvm/IR/Instruction.def:187:1
> #6 0x1921b77 in initialize
> /mnt/b/sanitizer-buildbot3/sanitizer-x86_64-linux-fast/build/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp:239:28
> #7 0x1921b77 in Context
> /mnt/b/sanitizer-buildbot3/sanitizer-x86_64-linux-fast/build/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp:154
> #8 0x1921b77 in (anonymous
> namespace)::PolynomialMultiplyRecognize::recognize()
> /mnt/b/sanitizer-buildbot3/sanitizer-x86_64-linux-fast/build/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp:1678
> #9 0x191e5b0 in runOnCountableLoop
> /mnt/b/sanitizer-buildbot3/sanitizer-x86_64-linux-fast/build/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp:2234:11
> #10 0x191e5b0 in (anonymous
> namespace)::HexagonLoopIdiomRecognize::runOnLoop(llvm::Loop*,
> llvm::LPPassManager&)
> /mnt/b/sanitizer-buildbot3/sanitizer-x86_64-linux-fast/build/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp:2291
> #11 0x2dcf8bc in llvm::LPPassManager::runOnFunction(llvm::Function&)
> /mnt/b/sanitizer-buildbot3/sanitizer-x86_64-linux-fast/build/llvm/lib/Analysis/LoopPass.cpp:203:23
> #12 0x3c7b90d in llvm::FPPassManager::runOnFunction(llvm::Function&)
> /mnt/b/sanitizer-buildbot3/sanitizer-x86_64-linux-fast/build/llvm/lib/IR/LegacyPassManager.cpp:1513:27
> #13 0x3c7beb2 in llvm::FPPassManager::runOnModule(llvm::Module&)
> /mnt/b/sanitizer-buildbot3/sanitizer-x86_64-linux-fast/build/llvm/lib/IR/LegacyPassManager.cpp:1534:16
> #14 0x3c7cd4a in runOnModule
> /mnt/b/sanitizer-buildbot3/sanitizer-x86_64-linux-fast/build/llvm/lib/IR/LegacyPassManager.cpp:1590:27
> #15 0x3c7cd4a in llvm::legacy::PassManagerImpl::run(llvm::Module&)
> /mnt/b/sanitizer-buildbot3/sanitizer-x86_64-linux-fast/build/llvm/lib/IR/LegacyPassManager.cpp:1693
> #16 0x9b4feb in main
> /mnt/b/sanitizer-buildbot3/sanitizer-x86_64-linux-fast/build/llvm/tools/opt/opt.cpp:722:10
> #17 0x7f9b7820482f in __libc_start_main
> (/lib/x86_64-linux-gnu/libc.so.6+0x2082f)
>
>
> On Mon, Mar 20, 2017 at 11:25 AM Krzysztof Parzyszek via llvm-commits
> <llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>> wrote:
>
>     Author: kparzysz
>     Date: Mon Mar 20 13:12:58 2017
>     New Revision: 298282
>
>     URL: http://llvm.org/viewvc/llvm-project?rev=298282&view=rev
>     Log:
>     [Hexagon] Recognize polynomial-modulo loop idiom again
>
>     Regain the ability to recognize loops calculating polynomial modulo
>     operation. This ability has been lost due to some changes in the
>     preceding optimizations. Add code to preprocess the IR to a form
>     that the pattern matching code can recognize.
>
>     Added:
>         llvm/trunk/test/CodeGen/Hexagon/loop-idiom/pmpy-mod.ll
>     Modified:
>         llvm/trunk/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
>
>     Modified: llvm/trunk/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
>     URL:
>     http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp?rev=298282&r1=298281&r2=298282&view=diff
>     ==============================================================================
>     --- llvm/trunk/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
>     (original)
>     +++ llvm/trunk/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
>     Mon Mar 20 13:12:58 2017
>     @@ -129,6 +129,342 @@ INITIALIZE_PASS_END(HexagonLoopIdiomReco
>          "Recognize Hexagon-specific loop idioms", false, false)
>
>
>     +namespace {
>     +  struct Simplifier {
>     +    typedef std::function<Value* (Instruction*, LLVMContext&)> Rule;
>     +
>     +    void addRule(const Rule &R) { Rules.push_back(R); }
>     +
>     +  private:
>     +    typedef std::deque<Value*> WorkListType;
>     +    typedef std::set<Value*> ValueSetType;
>     +    std::vector<Rule> Rules;
>     +
>     +  public:
>     +    struct Context {
>     +      typedef DenseMap<Value*,Value*> ValueMapType;
>     +
>     +      Value *Root;
>     +      ValueSetType Used;
>     +      ValueMapType Clones, Orig;
>     +      LLVMContext &Ctx;
>     +
>     +      Context(Instruction *Exp)
>     +        : Ctx(Exp->getParent()->getParent()->getContext()) {
>     +        initialize(Exp);
>     +        reset();
>     +      }
>     +      ~Context() { cleanup(); }
>     +      void print(raw_ostream &OS, const Value *V) const;
>     +
>     +      Value *materialize(BasicBlock *B, BasicBlock::iterator At);
>     +
>     +    private:
>     +      void initialize(Instruction *Exp);
>     +      void reset();
>     +      void cleanup();
>     +      void cleanup(Value *V);
>     +
>     +      bool equal(const Instruction *I, const Instruction *J) const;
>     +      Value *find(Value *Tree, Value *Sub) const;
>     +      Value *subst(Value *Tree, Value *OldV, Value *NewV);
>     +      void replace(Value *OldV, Value *NewV);
>     +      void link(Instruction *I, BasicBlock *B, BasicBlock::iterator
>     At);
>     +
>     +      friend struct Simplifier;
>     +    };
>     +
>     +    Value *simplify(Context &C);
>     +  };
>     +
>     +  struct PE {
>     +    PE(const Simplifier::Context &c, Value *v = nullptr) : C(c),
>     V(v) {}
>     +    const Simplifier::Context &C;
>     +    const Value *V;
>     +  };
>     +
>     +  raw_ostream &operator<< (raw_ostream &OS, const PE &P)
>     LLVM_ATTRIBUTE_USED;
>     +  raw_ostream &operator<< (raw_ostream &OS, const PE &P) {
>     +    P.C.print(OS, P.V ? P.V : P.C.Root);
>     +    return OS;
>     +  }
>     +}
>     +
>     +
>     +void Simplifier::Context::print(raw_ostream &OS, const Value *V)
>     const {
>     +  const auto *U = dyn_cast<const Instruction>(V);
>     +  if (!U) {
>     +    OS << V << '(' << *V << ')';
>     +    return;
>     +  }
>     +
>     +  if (U->getParent()) {
>     +    OS << U << '(';
>     +    U->printAsOperand(OS, true);
>     +    OS << ')';
>     +    return;
>     +  }
>     +
>     +  unsigned N = U->getNumOperands();
>     +  if (N != 0)
>     +    OS << U << '(';
>     +  OS << U->getOpcodeName();
>     +  for (const Value *Op : U->operands()) {
>     +    OS << ' ';
>     +    print(OS, Op);
>     +  }
>     +  if (N != 0)
>     +    OS << ')';
>     +}
>     +
>     +
>     +void Simplifier::Context::initialize(Instruction *Exp) {
>     +  // Perform a deep clone of the expression, set Root to the root
>     +  // of the clone, and build a map from the cloned values to the
>     +  // original ones.
>     +  BasicBlock *Block = Exp->getParent();
>     +  WorkListType Q;
>     +  Q.push_back(Exp);
>     +
>     +  while (!Q.empty()) {
>     +    Value *V = Q.front();
>     +    Q.pop_front();
>     +    if (Clones.find(V) != Clones.end())
>     +      continue;
>     +    if (Instruction *U = dyn_cast<Instruction>(V)) {
>     +      if (isa<PHINode>(U) || U->getParent() != Block)
>     +        continue;
>     +      for (Value *Op : U->operands())
>     +        Q.push_back(Op);
>     +      Clones.insert({U, U->clone()});
>     +    }
>     +  }
>     +
>     +  for (std::pair<Value*,Value*> P : Clones) {
>     +    Instruction *U = cast<Instruction>(P.second);
>     +    for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
>     +      auto F = Clones.find(U->getOperand(i));
>     +      if (F != Clones.end())
>     +        U->setOperand(i, F->second);
>     +    }
>     +    Orig.insert({P.second, P.first});
>     +  }
>     +
>     +  auto R = Clones.find(Exp);
>     +  assert(R != Clones.end());
>     +  Root = R->second;
>     +}
>     +
>     +
>     +void Simplifier::Context::reset() {
>     +  ValueSetType NewUsed;
>     +  WorkListType Q;
>     +  Q.push_back(Root);
>     +
>     +  while (!Q.empty()) {
>     +    Instruction *U = dyn_cast<Instruction>(Q.front());
>     +    Q.pop_front();
>     +    if (!U || U->getParent())
>     +      continue;
>     +    NewUsed.insert(U);
>     +    for (Value *Op : U->operands())
>     +      Q.push_back(Op);
>     +  }
>     +  for (Value *V : Used)
>     +    if (!NewUsed.count(V))
>     +      cast<Instruction>(V)->dropAllReferences();
>     +  Used = NewUsed;
>     +}
>     +
>     +
>     +Value *Simplifier::Context::subst(Value *Tree, Value *OldV, Value
>     *NewV) {
>     +  if (Tree == OldV) {
>     +    cleanup(OldV);
>     +    return NewV;
>     +  }
>     +
>     +  WorkListType Q;
>     +  Q.push_back(Tree);
>     +  while (!Q.empty()) {
>     +    Instruction *U = dyn_cast<Instruction>(Q.front());
>     +    Q.pop_front();
>     +    // If U is not an instruction, or it's not a clone, skip it.
>     +    if (!U || U->getParent())
>     +      continue;
>     +    for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
>     +      Value *Op = U->getOperand(i);
>     +      if (Op == OldV) {
>     +        cleanup(OldV);
>     +        U->setOperand(i, NewV);
>     +      } else {
>     +        Q.push_back(Op);
>     +      }
>     +    }
>     +  }
>     +  return Tree;
>     +}
>     +
>     +
>     +void Simplifier::Context::replace(Value *OldV, Value *NewV) {
>     +  if (Root == OldV) {
>     +    Root = NewV;
>     +    reset();
>     +    return;
>     +  }
>     +
>     +  // NewV may be a complex tree that has just been created by one
>     of the
>     +  // transformation rules. We need to make sure that it is commoned
>     with
>     +  // the existing Root to the maximum extent possible.
>     +  // Identify all subtrees of NewV (including NewV itself) that have
>     +  // equivalent counterparts in Root, and replace those subtrees with
>     +  // these counterparts.
>     +  WorkListType Q;
>     +  Q.push_back(NewV);
>     +  while (!Q.empty()) {
>     +    Value *V = Q.front();
>     +    Q.pop_front();
>     +    Instruction *U = dyn_cast<Instruction>(V);
>     +    if (!U || U->getParent())
>     +      continue;
>     +    if (Value *DupV = find(Root, V)) {
>     +      if (DupV != V)
>     +        NewV = subst(NewV, V, DupV);
>     +    } else {
>     +      for (Value *Op : U->operands())
>     +        Q.push_back(Op);
>     +    }
>     +  }
>     +
>     +  // Now, simply replace OldV with NewV in Root.
>     +  Root = subst(Root, OldV, NewV);
>     +  reset();
>     +}
>     +
>     +
>     +void Simplifier::Context::cleanup() {
>     +  for (Value *V : Used) {
>     +    Instruction *U = cast<Instruction>(V);
>     +    if (!U->getParent())
>     +      U->dropAllReferences();
>     +  }
>     +}
>     +
>     +
>     +void Simplifier::Context::cleanup(Value *V) {
>     +  if (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() !=
>     nullptr)
>     +    return;
>     +  WorkListType Q;
>     +  Q.push_back(V);
>     +  while (!Q.empty()) {
>     +    Instruction *U = dyn_cast<Instruction>(Q.front());
>     +    Q.pop_front();
>     +    if (!U || U->getParent() || Used.count(U))
>     +      continue;
>     +    for (Value *Op : U->operands())
>     +      Q.push_back(Op);
>     +    U->dropAllReferences();
>     +  }
>     +}
>     +
>     +
>     +bool Simplifier::Context::equal(const Instruction *I,
>     +                                const Instruction *J) const {
>     +  if (I == J)
>     +    return true;
>     +  if (!I->isSameOperationAs(J))
>     +    return false;
>     +  if (isa<PHINode>(I))
>     +    return I->isIdenticalTo(J);
>     +
>     +  for (unsigned i = 0, n = I->getNumOperands(); i != n; ++i) {
>     +    Value *OpI = I->getOperand(i), *OpJ = J->getOperand(i);
>     +    if (OpI == OpJ)
>     +      continue;
>     +    auto *InI = dyn_cast<const Instruction>(OpI);
>     +    auto *InJ = dyn_cast<const Instruction>(OpJ);
>     +    if (InI && InJ) {
>     +      if (!equal(InI, InJ))
>     +        return false;
>     +    } else if (InI != InJ || !InI)
>     +      return false;
>     +  }
>     +  return true;
>     +}
>     +
>     +
>     +Value *Simplifier::Context::find(Value *Tree, Value *Sub) const {
>     +  Instruction *SubI = dyn_cast<Instruction>(Sub);
>     +  WorkListType Q;
>     +  Q.push_back(Tree);
>     +
>     +  while (!Q.empty()) {
>     +    Value *V = Q.front();
>     +    Q.pop_front();
>     +    if (V == Sub)
>     +      return V;
>     +    Instruction *U = dyn_cast<Instruction>(V);
>     +    if (!U || U->getParent())
>     +      continue;
>     +    if (SubI && equal(SubI, U))
>     +      return U;
>     +    assert(!isa<PHINode>(U));
>     +    for (Value *Op : U->operands())
>     +      Q.push_back(Op);
>     +  }
>     +  return nullptr;
>     +}
>     +
>     +
>     +void Simplifier::Context::link(Instruction *I, BasicBlock *B,
>     +      BasicBlock::iterator At) {
>     +  if (I->getParent())
>     +    return;
>     +
>     +  for (Value *Op : I->operands()) {
>     +    if (Instruction *OpI = dyn_cast<Instruction>(Op))
>     +      link(OpI, B, At);
>     +  }
>     +
>     +  B->getInstList().insert(At, I);
>     +}
>     +
>     +
>     +Value *Simplifier::Context::materialize(BasicBlock *B,
>     +      BasicBlock::iterator At) {
>     +  if (Instruction *RootI = dyn_cast<Instruction>(Root))
>     +    link(RootI, B, At);
>     +  return Root;
>     +}
>     +
>     +
>     +Value *Simplifier::simplify(Context &C) {
>     +  WorkListType Q;
>     +  Q.push_back(C.Root);
>     +
>     +  while (!Q.empty()) {
>     +    Instruction *U = dyn_cast<Instruction>(Q.front());
>     +    Q.pop_front();
>     +    if (!U || U->getParent() || !C.Used.count(U))
>     +      continue;
>     +    bool Changed = false;
>     +    for (Rule &R : Rules) {
>     +      Value *W = R(U, C.Ctx);
>     +      if (!W)
>     +        continue;
>     +      Changed = true;
>     +      C.replace(U, W);
>     +      Q.push_back(C.Root);
>     +      break;
>     +    }
>     +    if (!Changed) {
>     +      for (Value *Op : U->operands())
>     +        Q.push_back(Op);
>     +    }
>     +  }
>     +  return C.Root;
>     +}
>     +
>     +
>      //===----------------------------------------------------------------------===//
>      //
>      //          Implementation of PolynomialMultiplyRecognize
>     @@ -147,6 +483,14 @@ namespace {
>        private:
>          typedef SetVector<Value*> ValueSeq;
>
>     +    IntegerType *getPmpyType() const {
>     +      LLVMContext &Ctx =
>     CurLoop->getHeader()->getParent()->getContext();
>     +      return IntegerType::get(Ctx, 32);
>     +    }
>     +    bool isPromotableTo(Value *V, IntegerType *Ty);
>     +    void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock
>     *LoopB);
>     +    bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB);
>     +
>          Value *getCountIV(BasicBlock *BB);
>          bool findCycle(Value *Out, Value *In, ValueSeq &Cycle);
>          void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq
>     &Early,
>     @@ -176,6 +520,9 @@ namespace {
>          unsigned getInverseMxN(unsigned QP);
>          Value *generate(BasicBlock::iterator At, ParsedValues &PV);
>
>     +    void setupSimplifier();
>     +
>     +    Simplifier Simp;
>          Loop *CurLoop;
>          const DataLayout &DL;
>          const DominatorTree &DT;
>     @@ -425,7 +772,6 @@ bool PolynomialMultiplyRecognize::scanSe
>            BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV,
>     ParsedValues &PV,
>            bool PreScan) {
>        using namespace PatternMatch;
>     -
>        // The basic pattern for R = P.Q is:
>        // for i = 0..31
>        //   R = phi (0, R')
>     @@ -529,6 +875,150 @@ bool PolynomialMultiplyRecognize::scanSe
>      }
>
>
>     +bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val,
>     +      IntegerType *DestTy) {
>     +  IntegerType *T = dyn_cast<IntegerType>(Val->getType());
>     +  if (!T || T->getBitWidth() > DestTy->getBitWidth())
>     +    return false;
>     +  if (T->getBitWidth() == DestTy->getBitWidth())
>     +    return true;
>     +  // Non-instructions are promotable. The reason why an instruction
>     may not
>     +  // be promotable is that it may produce a different result if its
>     operands
>     +  // and the result are promoted, for example, it may produce more
>     non-zero
>     +  // bits. While it would still be possible to represent the proper
>     result
>     +  // in a wider type, it may require adding additional instructions
>     (which
>     +  // we don't want to do).
>     +  Instruction *In = dyn_cast<Instruction>(Val);
>     +  if (!In)
>     +    return true;
>     +  // The bitwidth of the source type is smaller than the destination.
>     +  // Check if the individual operation can be promoted.
>     +  switch (In->getOpcode()) {
>     +    case Instruction::PHI:
>     +    case Instruction::ZExt:
>     +    case Instruction::And:
>     +    case Instruction::Or:
>     +    case Instruction::Xor:
>     +    case Instruction::LShr: // Shift right is ok.
>     +    case Instruction::Select:
>     +      return true;
>     +    case Instruction::ICmp:
>     +      if (CmpInst *CI = cast<CmpInst>(In))
>     +        return CI->isEquality() || CI->isUnsigned();
>     +      llvm_unreachable("Cast failed unexpectedly");
>     +    case Instruction::Add:
>     +      return In->hasNoSignedWrap() && In->hasNoUnsignedWrap();
>     +  }
>     +  return false;
>     +}
>     +
>     +
>     +void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
>     +      IntegerType *DestTy, BasicBlock *LoopB) {
>     +  // Leave boolean values alone.
>     +  if (!In->getType()->isIntegerTy(1))
>     +    In->mutateType(DestTy);
>     +  unsigned DestBW = DestTy->getBitWidth();
>     +
>     +  // Handle PHIs.
>     +  if (PHINode *P = dyn_cast<PHINode>(In)) {
>     +    unsigned N = P->getNumIncomingValues();
>     +    for (unsigned i = 0; i != N; ++i) {
>     +      BasicBlock *InB = P->getIncomingBlock(i);
>     +      if (InB == LoopB)
>     +        continue;
>     +      Value *InV = P->getIncomingValue(i);
>     +      IntegerType *Ty = cast<IntegerType>(InV->getType());
>     +      // Do not promote values in PHI nodes of type i1.
>     +      if (Ty != P->getType()) {
>     +        // If the value type does not match the PHI type, the PHI type
>     +        // must have been promoted.
>     +        assert(Ty->getBitWidth() < DestBW);
>     +        InV = IRBuilder<>(InB->getTerminator()).CreateZExt(InV,
>     DestTy);
>     +        P->setIncomingValue(i, InV);
>     +      }
>     +    }
>     +  } else if (ZExtInst *Z = dyn_cast<ZExtInst>(In)) {
>     +    Value *Op = Z->getOperand(0);
>     +    if (Op->getType() == Z->getType())
>     +      Z->replaceAllUsesWith(Op);
>     +    Z->eraseFromParent();
>     +    return;
>     +  }
>     +
>     +  // Promote immediates.
>     +  for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) {
>     +    if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i)))
>     +      if (CI->getType()->getBitWidth() < DestBW)
>     +        In->setOperand(i, ConstantInt::get(DestTy,
>     CI->getZExtValue()));
>     +  }
>     +}
>     +
>     +
>     +bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
>     +      BasicBlock *ExitB) {
>     +  assert(LoopB);
>     +  // Skip loops where the exit block has more than one predecessor.
>     The values
>     +  // coming from the loop block will be promoted to another type,
>     and so the
>     +  // values coming into the exit block from other predecessors
>     would also have
>     +  // to be promoted.
>     +  if (!ExitB || (ExitB->getSinglePredecessor() != LoopB))
>     +    return false;
>     +  IntegerType *DestTy = getPmpyType();
>     +  // Check if the exit values have types that are no wider than the
>     type
>     +  // that we want to promote to.
>     +  unsigned DestBW = DestTy->getBitWidth();
>     +  for (Instruction &In : *ExitB) {
>     +    PHINode *P = dyn_cast<PHINode>(&In);
>     +    if (!P)
>     +      break;
>     +    if (P->getNumIncomingValues() != 1)
>     +      return false;
>     +    assert(P->getIncomingBlock(0) == LoopB);
>     +    IntegerType *T = dyn_cast<IntegerType>(P->getType());
>     +    if (!T || T->getBitWidth() > DestBW)
>     +      return false;
>     +  }
>     +
>     +  // Check all instructions in the loop.
>     +  for (Instruction &In : *LoopB)
>     +    if (!In.isTerminator() && !isPromotableTo(&In, DestTy))
>     +      return false;
>     +
>     +  // Perform the promotion.
>     +  std::vector<Instruction*> LoopIns;
>     +  std::transform(LoopB->begin(), LoopB->end(),
>     std::back_inserter(LoopIns),
>     +                 [](Instruction &In) { return &In; });
>     +  for (Instruction *In : LoopIns)
>     +    promoteTo(In, DestTy, LoopB);
>     +
>     +  // Fix up the PHI nodes in the exit block.
>     +  Instruction *EndI = ExitB->getFirstNonPHI();
>     +  BasicBlock::iterator End = EndI ? EndI->getIterator() : ExitB->end();
>     +  for (auto I = ExitB->begin(); I != End; ++I) {
>     +    PHINode *P = dyn_cast<PHINode>(I);
>     +    if (!P)
>     +      break;
>     +    Type *Ty0 = P->getIncomingValue(0)->getType();
>     +    Type *PTy = P->getType();
>     +    if (PTy != Ty0) {
>     +      assert(Ty0 == DestTy);
>     +      // In order to create the trunc, P must have the promoted type.
>     +      P->mutateType(Ty0);
>     +      Value *T = IRBuilder<>(ExitB, End).CreateTrunc(P, PTy);
>     +      // In order for the RAUW to work, the types of P and T must
>     match.
>     +      P->mutateType(PTy);
>     +      P->replaceAllUsesWith(T);
>     +      // Final update of the P's type.
>     +      P->mutateType(Ty0);
>     +      cast<Instruction>(T)->setOperand(0, P);
>     +    }
>     +  }
>     +
>     +  return true;
>     +}
>     +
>     +
>      bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In,
>            ValueSeq &Cycle) {
>        // Out = ..., In, ...
>     @@ -699,6 +1189,7 @@ bool PolynomialMultiplyRecognize::keepsH
>            case Instruction::Select:
>            case Instruction::ICmp:
>            case Instruction::PHI:
>     +      case Instruction::ZExt:
>              return true;
>          }
>        }
>     @@ -985,13 +1476,170 @@ Value *PolynomialMultiplyRecognize::gene
>      }
>
>
>     +void PolynomialMultiplyRecognize::setupSimplifier() {
>     +  Simp.addRule(
>     +    // Sink zext past bitwise operations.
>     +    [](Instruction *I, LLVMContext &Ctx) -> Value* {
>     +      if (I->getOpcode() != Instruction::ZExt)
>     +        return nullptr;
>     +      Instruction *T = dyn_cast<Instruction>(I->getOperand(0));
>     +      if (!T)
>     +        return nullptr;
>     +      switch (T->getOpcode()) {
>     +        case Instruction::And:
>     +        case Instruction::Or:
>     +        case Instruction::Xor:
>     +          break;
>     +        default:
>     +          return nullptr;
>     +      }
>     +      IRBuilder<> B(Ctx);
>     +      return B.CreateBinOp(cast<BinaryOperator>(T)->getOpcode(),
>     +                           B.CreateZExt(T->getOperand(0),
>     I->getType()),
>     +                           B.CreateZExt(T->getOperand(1),
>     I->getType()));
>     +    });
>     +  Simp.addRule(
>     +    // (xor (and x a) (and y a)) -> (and (xor x y) a)
>     +    [](Instruction *I, LLVMContext &Ctx) -> Value* {
>     +      if (I->getOpcode() != Instruction::Xor)
>     +        return nullptr;
>     +      Instruction *And0 = dyn_cast<Instruction>(I->getOperand(0));
>     +      Instruction *And1 = dyn_cast<Instruction>(I->getOperand(1));
>     +      if (!And0 || !And1)
>     +        return nullptr;
>     +      if (And0->getOpcode() != Instruction::And ||
>     +          And1->getOpcode() != Instruction::And)
>     +        return nullptr;
>     +      if (And0->getOperand(1) != And1->getOperand(1))
>     +        return nullptr;
>     +      IRBuilder<> B(Ctx);
>     +      return B.CreateAnd(B.CreateXor(And0->getOperand(0),
>     And1->getOperand(0)),
>     +                         And0->getOperand(1));
>     +    });
>     +  Simp.addRule(
>     +    // (Op (select c x y) z) -> (select c (Op x z) (Op y z))
>     +    // (Op x (select c y z)) -> (select c (Op x y) (Op x z))
>     +    [](Instruction *I, LLVMContext &Ctx) -> Value* {
>     +      BinaryOperator *BO = dyn_cast<BinaryOperator>(I);
>     +      if (!BO)
>     +        return nullptr;
>     +      Instruction::BinaryOps Op = BO->getOpcode();
>     +      if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(0))) {
>     +        IRBuilder<> B(Ctx);
>     +        Value *X = Sel->getTrueValue(), *Y = Sel->getFalseValue();
>     +        Value *Z = BO->getOperand(1);
>     +        return B.CreateSelect(Sel->getCondition(),
>     +                              B.CreateBinOp(Op, X, Z),
>     +                              B.CreateBinOp(Op, Y, Z));
>     +      }
>     +      if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(1))) {
>     +        IRBuilder<> B(Ctx);
>     +        Value *X = BO->getOperand(0);
>     +        Value *Y = Sel->getTrueValue(), *Z = Sel->getFalseValue();
>     +        return B.CreateSelect(Sel->getCondition(),
>     +                              B.CreateBinOp(Op, X, Y),
>     +                              B.CreateBinOp(Op, X, Z));
>     +      }
>     +      return nullptr;
>     +    });
>     +  Simp.addRule(
>     +    // (select c (select c x y) z) -> (select c x z)
>     +    // (select c x (select c y z)) -> (select c x z)
>     +    [](Instruction *I, LLVMContext &Ctx) -> Value* {
>     +      SelectInst *Sel = dyn_cast<SelectInst>(I);
>     +      if (!Sel)
>     +        return nullptr;
>     +      IRBuilder<> B(Ctx);
>     +      Value *C = Sel->getCondition();
>     +      if (SelectInst *Sel0 =
>     dyn_cast<SelectInst>(Sel->getTrueValue())) {
>     +        if (Sel0->getCondition() == C)
>     +          return B.CreateSelect(C, Sel0->getTrueValue(),
>     Sel->getFalseValue());
>     +      }
>     +      if (SelectInst *Sel1 =
>     dyn_cast<SelectInst>(Sel->getFalseValue())) {
>     +        if (Sel1->getCondition() == C)
>     +          return B.CreateSelect(C, Sel->getTrueValue(),
>     Sel1->getFalseValue());
>     +      }
>     +      return nullptr;
>     +    });
>     +  Simp.addRule(
>     +    // (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0)
>     +    [](Instruction *I, LLVMContext &Ctx) -> Value* {
>     +      if (I->getOpcode() != Instruction::Or)
>     +        return nullptr;
>     +      Instruction *LShr = dyn_cast<Instruction>(I->getOperand(0));
>     +      if (!LShr || LShr->getOpcode() != Instruction::LShr)
>     +        return nullptr;
>     +      ConstantInt *One = dyn_cast<ConstantInt>(LShr->getOperand(1));
>     +      if (!One || One->getZExtValue() != 1)
>     +        return nullptr;
>     +      ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1));
>     +      if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit())
>     +        return nullptr;
>     +      return IRBuilder<>(Ctx).CreateXor(LShr, Msb);
>     +    });
>     +  Simp.addRule(
>     +    // (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c))
>     +    [](Instruction *I, LLVMContext &Ctx) -> Value* {
>     +      if (I->getOpcode() != Instruction::LShr)
>     +        return nullptr;
>     +      BinaryOperator *BitOp =
>     dyn_cast<BinaryOperator>(I->getOperand(0));
>     +      if (!BitOp)
>     +        return nullptr;
>     +      switch (BitOp->getOpcode()) {
>     +        case Instruction::And:
>     +        case Instruction::Or:
>     +        case Instruction::Xor:
>     +          break;
>     +        default:
>     +          return nullptr;
>     +      }
>     +      IRBuilder<> B(Ctx);
>     +      Value *S = I->getOperand(1);
>     +      return B.CreateBinOp(BitOp->getOpcode(),
>     +                B.CreateLShr(BitOp->getOperand(0), S),
>     +                B.CreateLShr(BitOp->getOperand(1), S));
>     +    });
>     +  Simp.addRule(
>     +    // (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b))
>     +    [](Instruction *I, LLVMContext &Ctx) -> Value* {
>     +      auto IsBitOp = [](unsigned Op) -> bool {
>     +        switch (Op) {
>     +          case Instruction::And:
>     +          case Instruction::Or:
>     +          case Instruction::Xor:
>     +            return true;
>     +        }
>     +        return false;
>     +      };
>     +      BinaryOperator *BitOp1 = dyn_cast<BinaryOperator>(I);
>     +      if (!BitOp1 || !IsBitOp(BitOp1->getOpcode()))
>     +        return nullptr;
>     +      BinaryOperator *BitOp2 =
>     dyn_cast<BinaryOperator>(BitOp1->getOperand(0));
>     +      if (!BitOp2 || !IsBitOp(BitOp2->getOpcode()))
>     +        return nullptr;
>     +      ConstantInt *CA = dyn_cast<ConstantInt>(BitOp2->getOperand(1));
>     +      ConstantInt *CB = dyn_cast<ConstantInt>(BitOp1->getOperand(1));
>     +      if (!CA || !CB)
>     +        return nullptr;
>     +      IRBuilder<> B(Ctx);
>     +      Value *X = BitOp2->getOperand(0);
>     +      return B.CreateBinOp(BitOp2->getOpcode(), X,
>     +                B.CreateBinOp(BitOp1->getOpcode(), CA, CB));
>     +    });
>     +}
>     +
>     +
>      bool PolynomialMultiplyRecognize::recognize() {
>     +  DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
>     +               << *CurLoop << '\n');
>        // Restrictions:
>        // - The loop must consist of a single block.
>        // - The iteration count must be known at compile-time.
>        // - The loop must have an induction variable starting from 0, and
>        //   incremented in each iteration of the loop.
>        BasicBlock *LoopB = CurLoop->getHeader();
>     +  DEBUG(dbgs() << "Loop header:\n" << *LoopB);
>     +
>        if (LoopB != CurLoop->getLoopLatch())
>          return false;
>        BasicBlock *ExitB = CurLoop->getExitBlock();
>     @@ -1011,30 +1659,65 @@ bool PolynomialMultiplyRecognize::recogn
>        Value *CIV = getCountIV(LoopB);
>        ParsedValues PV;
>        PV.IterCount = IterCount;
>     +  DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " <<
>     IterCount << '\n');
>
>     -  // Test function to see if a given select instruction is a part
>     of the
>     -  // pmpy pattern. The argument PreScan set to "true" indicates
>     that only
>     -  // a preliminary scan is needed, "false" indicated an exact match.
>     -  auto CouldBePmpy = [this, LoopB, EntryB, CIV, &PV] (bool PreScan)
>     -      -> std::function<bool (Instruction &I)> {
>     -    return [this, LoopB, EntryB, CIV, &PV, PreScan] (Instruction
>     &I) -> bool {
>     -      if (auto *SelI = dyn_cast<SelectInst>(&I))
>     -        return scanSelect(SelI, LoopB, EntryB, CIV, PV, PreScan);
>     -      return false;
>     -    };
>     -  };
>     -  auto PreF = std::find_if(LoopB->begin(), LoopB->end(),
>     CouldBePmpy(true));
>     -  if (PreF == LoopB->end())
>     +  setupSimplifier();
>     +
>     +  // Perform a preliminary scan of select instructions to see if
>     any of them
>     +  // looks like a generator of the polynomial multiply steps.
>     Assume that a
>     +  // loop can only contain a single transformable operation, so
>     stop the
>     +  // traversal after the first reasonable candidate was found.
>     +  // XXX: Currently this approach can modify the loop before being
>     100% sure
>     +  // that the transformation can be carried out.
>     +  bool FoundPreScan = false;
>     +  for (Instruction &In : *LoopB) {
>     +    SelectInst *SI = dyn_cast<SelectInst>(&In);
>     +    if (!SI)
>     +      continue;
>     +
>     +    Simplifier::Context C(SI);
>     +    Value *T = Simp.simplify(C);
>     +    SelectInst *SelI = (T && isa<SelectInst>(T)) ?
>     cast<SelectInst>(T) : SI;
>     +    DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n');
>     +    if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) {
>     +      FoundPreScan = true;
>     +      if (SelI != SI) {
>     +        Value *NewSel = C.materialize(LoopB, SI->getIterator());
>     +        SI->replaceAllUsesWith(NewSel);
>     +        RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
>     +      }
>     +      break;
>     +    }
>     +  }
>     +
>     +  if (!FoundPreScan) {
>     +    DEBUG(dbgs() << "Have not found candidates for pmpy\n");
>          return false;
>     +  }
>
>        if (!PV.Left) {
>     +    // The right shift version actually only returns the higher bits of
>     +    // the result (each iteration discards the LSB). If we want to
>     convert it
>     +    // to a left-shifting loop, the working data type must be at
>     least as
>     +    // wide as the target's pmpy instruction.
>     +    if (!promoteTypes(LoopB, ExitB))
>     +      return false;
>          convertShiftsToLeft(LoopB, ExitB, IterCount);
>          cleanupLoopBody(LoopB);
>        }
>
>     -  auto PostF = std::find_if(LoopB->begin(), LoopB->end(),
>     CouldBePmpy(false));
>     -  if (PostF == LoopB->end())
>     -    return false;
>     +  // Scan the loop again, find the generating select instruction.
>     +  bool FoundScan = false;
>     +  for (Instruction &In : *LoopB) {
>     +    SelectInst *SelI = dyn_cast<SelectInst>(&In);
>     +    if (!SelI)
>     +      continue;
>     +    DEBUG(dbgs() << "scanSelect: " << *SelI << '\n');
>     +    FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false);
>     +    if (FoundScan)
>     +      break;
>     +  }
>     +  assert(FoundScan);
>
>        DEBUG({
>          StringRef PP = (PV.M ? "(P+M)" : "P");
>
>     Added: llvm/trunk/test/CodeGen/Hexagon/loop-idiom/pmpy-mod.ll
>     URL:
>     http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/loop-idiom/pmpy-mod.ll?rev=298282&view=auto
>     ==============================================================================
>     --- llvm/trunk/test/CodeGen/Hexagon/loop-idiom/pmpy-mod.ll (added)
>     +++ llvm/trunk/test/CodeGen/Hexagon/loop-idiom/pmpy-mod.ll Mon Mar
>     20 13:12:58 2017
>     @@ -0,0 +1,84 @@
>     +; Run -O2 to make sure that all the usual optimizations do happen
>     before
>     +; the Hexagon loop idiom recognition runs. This is to check that we
>     still
>     +; get this opportunity regardless of what happens before.
>     +
>     +; RUN: opt -O2 -march=hexagon -S < %s | FileCheck %s
>     +
>     +target triple = "hexagon"
>     +target datalayout =
>     "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
>     +
>     +; CHECK-LABEL: define zeroext i16 @pmpy_mod_lsr
>     +; There need to be two pmpy instructions.
>     +; CHECK: call i64 @llvm.hexagon.M4.pmpyw
>     +; CHECK: call i64 @llvm.hexagon.M4.pmpyw
>     +
>     +define zeroext i16 @pmpy_mod_lsr(i8 zeroext %a0, i16 zeroext %a1) #0 {
>     +b2:
>     +  br label %b3
>     +
>     +b3:                                               ; preds = %b44, %b2
>     +  %v4 = phi i8 [ %a0, %b2 ], [ %v19, %b44 ]
>     +  %v5 = phi i16 [ %a1, %b2 ], [ %v43, %b44 ]
>     +  %v6 = phi i8 [ 0, %b2 ], [ %v45, %b44 ]
>     +  %v7 = zext i8 %v6 to i32
>     +  %v8 = icmp slt i32 %v7, 8
>     +  br i1 %v8, label %b9, label %b46
>     +
>     +b9:                                               ; preds = %b3
>     +  %v10 = zext i8 %v4 to i32
>     +  %v11 = and i32 %v10, 1
>     +  %v12 = trunc i16 %v5 to i8
>     +  %v13 = zext i8 %v12 to i32
>     +  %v14 = and i32 %v13, 1
>     +  %v15 = xor i32 %v11, %v14
>     +  %v16 = trunc i32 %v15 to i8
>     +  %v17 = zext i8 %v4 to i32
>     +  %v18 = ashr i32 %v17, 1
>     +  %v19 = trunc i32 %v18 to i8
>     +  %v20 = zext i8 %v16 to i32
>     +  %v21 = icmp eq i32 %v20, 1
>     +  br i1 %v21, label %b22, label %b26
>     +
>     +b22:                                              ; preds = %b9
>     +  %v23 = zext i16 %v5 to i32
>     +  %v24 = xor i32 %v23, 16386
>     +  %v25 = trunc i32 %v24 to i16
>     +  br label %b27
>     +
>     +b26:                                              ; preds = %b9
>     +  br label %b27
>     +
>     +b27:                                              ; preds = %b26, %b22
>     +  %v28 = phi i16 [ %v25, %b22 ], [ %v5, %b26 ]
>     +  %v29 = phi i8 [ 1, %b22 ], [ 0, %b26 ]
>     +  %v30 = zext i16 %v28 to i32
>     +  %v31 = ashr i32 %v30, 1
>     +  %v32 = trunc i32 %v31 to i16
>     +  %v33 = icmp ne i8 %v29, 0
>     +  br i1 %v33, label %b34, label %b38
>     +
>     +b34:                                              ; preds = %b27
>     +  %v35 = zext i16 %v32 to i32
>     +  %v36 = or i32 %v35, 32768
>     +  %v37 = trunc i32 %v36 to i16
>     +  br label %b42
>     +
>     +b38:                                              ; preds = %b27
>     +  %v39 = zext i16 %v32 to i32
>     +  %v40 = and i32 %v39, 32767
>     +  %v41 = trunc i32 %v40 to i16
>     +  br label %b42
>     +
>     +b42:                                              ; preds = %b38, %b34
>     +  %v43 = phi i16 [ %v37, %b34 ], [ %v41, %b38 ]
>     +  br label %b44
>     +
>     +b44:                                              ; preds = %b42
>     +  %v45 = add i8 %v6, 1
>     +  br label %b3
>     +
>     +b46:                                              ; preds = %b3
>     +  ret i16 %v5
>     +}
>     +
>     +attributes #0 = { noinline nounwind "target-cpu"="hexagonv5"
>     "target-features"="-hvx,-hvx-double,-long-calls" }
>
>
>     _______________________________________________
>     llvm-commits mailing list
>     llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>
>     http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, 
hosted by The Linux Foundation


More information about the llvm-commits mailing list