<div dir="ltr">Hello Krzysztof,<br><br>This commit added a warning to one of our builders:<br><br>llvm.src/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp:1083:8: warning: variable ‘IsVolatile’ set but not used [-Wunused-but-set-variable]<br><br><a href="http://lab.llvm.org:8011/builders/clang-3stage-ubuntu">http://lab.llvm.org:8011/builders/clang-3stage-ubuntu</a><br><br>Please have a look at this?<br><br>Thanks<br><br>Galina<br><br></div><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Jan 26, 2017 at 1:41 PM, Krzysztof Parzyszek via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: kparzysz<br>
Date: Thu Jan 26 15:41:10 2017<br>
New Revision: 293213<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=293213&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project?rev=293213&view=rev</a><br>
Log:<br>
[Hexagon] Add Hexagon-specific loop idiom recognition pass<br>
<br>
Added:<br>
llvm/trunk/lib/Target/Hexagon/<wbr>HexagonLoopIdiomRecognition.<wbr>cpp<br>
llvm/trunk/test/CodeGen/<wbr>Hexagon/loop-idiom/<br>
llvm/trunk/test/CodeGen/<wbr>Hexagon/loop-idiom/hexagon-<wbr>memmove1.ll<br>
llvm/trunk/test/CodeGen/<wbr>Hexagon/loop-idiom/hexagon-<wbr>memmove2.ll<br>
llvm/trunk/test/CodeGen/<wbr>Hexagon/loop-idiom/lcssa.ll<br>
llvm/trunk/test/CodeGen/<wbr>Hexagon/loop-idiom/nullptr-<wbr>crash.ll<br>
llvm/trunk/test/CodeGen/<wbr>Hexagon/loop-idiom/pmpy.ll<br>
Modified:<br>
llvm/trunk/lib/Target/Hexagon/<wbr>CMakeLists.txt<br>
llvm/trunk/lib/Target/Hexagon/<wbr>HexagonTargetMachine.cpp<br>
llvm/trunk/lib/Target/Hexagon/<wbr>HexagonTargetMachine.h<br>
<br>
Modified: llvm/trunk/lib/Target/Hexagon/<wbr>CMakeLists.txt<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/CMakeLists.txt?rev=293213&r1=293212&r2=293213&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/lib/Target/<wbr>Hexagon/CMakeLists.txt?rev=<wbr>293213&r1=293212&r2=293213&<wbr>view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/lib/Target/Hexagon/<wbr>CMakeLists.txt (original)<br>
+++ llvm/trunk/lib/Target/Hexagon/<wbr>CMakeLists.txt Thu Jan 26 15:41:10 2017<br>
@@ -35,6 +35,7 @@ add_llvm_target(HexagonCodeGen<br>
HexagonInstrInfo.cpp<br>
HexagonISelDAGToDAG.cpp<br>
HexagonISelLowering.cpp<br>
+ HexagonLoopIdiomRecognition.<wbr>cpp<br>
HexagonMachineFunctionInfo.cpp<br>
HexagonMachineScheduler.cpp<br>
HexagonMCInstLower.cpp<br>
<br>
Added: llvm/trunk/lib/Target/Hexagon/<wbr>HexagonLoopIdiomRecognition.<wbr>cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp?rev=293213&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/lib/Target/<wbr>Hexagon/<wbr>HexagonLoopIdiomRecognition.<wbr>cpp?rev=293213&view=auto</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/lib/Target/Hexagon/<wbr>HexagonLoopIdiomRecognition.<wbr>cpp (added)<br>
+++ llvm/trunk/lib/Target/Hexagon/<wbr>HexagonLoopIdiomRecognition.<wbr>cpp Thu Jan 26 15:41:10 2017<br>
@@ -0,0 +1,1618 @@<br>
+//===--- HexagonLoopIdiomRecognition.<wbr>cpp ------------------------------<wbr>----===//<br>
+//<br>
+// The LLVM Compiler Infrastructure<br>
+//<br>
+// This file is distributed under the University of Illinois Open Source<br>
+// License. See LICENSE.TXT for details.<br>
+//<br>
+//===------------------------<wbr>------------------------------<wbr>----------------===//<br>
+<br>
+#define DEBUG_TYPE "hexagon-lir"<br>
+<br>
+#include "llvm/ADT/SetVector.h"<br>
+#include "llvm/ADT/SmallSet.h"<br>
+#include "llvm/Analysis/AliasAnalysis.<wbr>h"<br>
+#include "llvm/Analysis/<wbr>InstructionSimplify.h"<br>
+#include "llvm/Analysis/LoopPass.h"<br>
+#include "llvm/Analysis/<wbr>ScalarEvolution.h"<br>
+#include "llvm/Analysis/<wbr>ScalarEvolutionExpander.h"<br>
+#include "llvm/Analysis/<wbr>ScalarEvolutionExpressions.h"<br>
+#include "llvm/Analysis/<wbr>TargetLibraryInfo.h"<br>
+#include "llvm/Analysis/ValueTracking.<wbr>h"<br>
+#include "llvm/IR/DataLayout.h"<br>
+#include "llvm/IR/Dominators.h"<br>
+#include "llvm/IR/IRBuilder.h"<br>
+#include "llvm/IR/PatternMatch.h"<br>
+#include "llvm/Transforms/Scalar.h"<br>
+#include "llvm/Transforms/Utils/Local.<wbr>h"<br>
+#include "llvm/Support/Debug.h"<br>
+#include "llvm/Support/raw_ostream.h"<br>
+<br>
+#include <algorithm><br>
+#include <array><br>
+<br>
+using namespace llvm;<br>
+<br>
+static cl::opt<bool> DisableMemcpyIdiom("disable-<wbr>memcpy-idiom",<br>
+ cl::Hidden, cl::init(false),<br>
+ cl::desc("Disable generation of memcpy in loop idiom recognition"));<br>
+<br>
+static cl::opt<bool> DisableMemmoveIdiom("disable-<wbr>memmove-idiom",<br>
+ cl::Hidden, cl::init(false),<br>
+ cl::desc("Disable generation of memmove in loop idiom recognition"));<br>
+<br>
+static cl::opt<unsigned> RuntimeMemSizeThreshold("<wbr>runtime-mem-idiom-threshold",<br>
+ cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime "<br>
+ "check guarding the memmove."));<br>
+<br>
+static cl::opt<unsigned> CompileTimeMemSizeThreshold(<br>
+ "compile-time-mem-idiom-<wbr>threshold", cl::Hidden, cl::init(64),<br>
+ cl::desc("Threshold (in bytes) to perform the transformation, if the "<br>
+ "runtime loop count (mem transfer size) is known at compile-time."));<br>
+<br>
+static cl::opt<bool> OnlyNonNestedMemmove("only-<wbr>nonnested-memmove-idiom",<br>
+ cl::Hidden, cl::init(true),<br>
+ cl::desc("Only enable generating memmove in non-nested loops"));<br>
+<br>
+cl::opt<bool> HexagonVolatileMemcpy("<wbr>disable-hexagon-volatile-<wbr>memcpy",<br>
+ cl::Hidden, cl::init(false),<br>
+ cl::desc("Enable Hexagon-specific memcpy for volatile destination."));<br>
+<br>
+static const char *HexagonVolatileMemcpyName<br>
+ = "hexagon_memcpy_forward_<wbr>vp4cp4n2";<br>
+<br>
+<br>
+namespace llvm {<br>
+ void initializeHexagonLoopIdiomReco<wbr>gnizePass(PassRegistry&);<br>
+ Pass *createHexagonLoopIdiomPass();<br>
+}<br>
+<br>
+namespace {<br>
+ class HexagonLoopIdiomRecognize : public LoopPass {<br>
+ public:<br>
+ static char ID;<br>
+ explicit HexagonLoopIdiomRecognize() : LoopPass(ID) {<br>
+ initializeHexagonLoopIdiomReco<wbr>gnizePass(*PassRegistry::<wbr>getPassRegistry());<br>
+ }<br>
+ StringRef getPassName() const override {<br>
+ return "Recognize Hexagon-specific loop idioms";<br>
+ }<br>
+<br>
+ void getAnalysisUsage(AnalysisUsage &AU) const override {<br>
+ AU.addRequired<<wbr>LoopInfoWrapperPass>();<br>
+ AU.addRequiredID(<wbr>LoopSimplifyID);<br>
+ AU.addRequiredID(LCSSAID);<br>
+ AU.addRequired<<wbr>AAResultsWrapperPass>();<br>
+ AU.addPreserved<<wbr>AAResultsWrapperPass>();<br>
+ AU.addRequired<<wbr>ScalarEvolutionWrapperPass>();<br>
+ AU.addRequired<<wbr>DominatorTreeWrapperPass>();<br>
+ AU.addRequired<<wbr>TargetLibraryInfoWrapperPass>(<wbr>);<br>
+ AU.addPreserved<<wbr>TargetLibraryInfoWrapperPass>(<wbr>);<br>
+ }<br>
+<br>
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;<br>
+<br>
+ private:<br>
+ unsigned getStoreSizeInBytes(StoreInst *SI);<br>
+ int getSCEVStride(const SCEVAddRecExpr *StoreEv);<br>
+ bool isLegalStore(Loop *CurLoop, StoreInst *SI);<br>
+ void collectStores(Loop *CurLoop, BasicBlock *BB,<br>
+ SmallVectorImpl<StoreInst*> &Stores);<br>
+ bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const SCEV *BECount);<br>
+ bool coverLoop(Loop *L, SmallVectorImpl<Instruction*> &Insts) const;<br>
+ bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV *BECount,<br>
+ SmallVectorImpl<BasicBlock*> &ExitBlocks);<br>
+ bool runOnCountableLoop(Loop *L);<br>
+<br>
+ AliasAnalysis *AA;<br>
+ const DataLayout *DL;<br>
+ DominatorTree *DT;<br>
+ LoopInfo *LF;<br>
+ const TargetLibraryInfo *TLI;<br>
+ ScalarEvolution *SE;<br>
+ bool HasMemcpy, HasMemmove;<br>
+ };<br>
+}<br>
+<br>
+char HexagonLoopIdiomRecognize::ID = 0;<br>
+<br>
+INITIALIZE_PASS_BEGIN(<wbr>HexagonLoopIdiomRecognize, "hexagon-loop-idiom",<br>
+ "Recognize Hexagon-specific loop idioms", false, false)<br>
+INITIALIZE_PASS_DEPENDENCY(<wbr>LoopInfoWrapperPass)<br>
+INITIALIZE_PASS_DEPENDENCY(<wbr>LoopSimplify)<br>
+INITIALIZE_PASS_DEPENDENCY(<wbr>LCSSAWrapperPass)<br>
+INITIALIZE_PASS_DEPENDENCY(<wbr>ScalarEvolutionWrapperPass)<br>
+INITIALIZE_PASS_DEPENDENCY(<wbr>DominatorTreeWrapperPass)<br>
+INITIALIZE_PASS_DEPENDENCY(<wbr>TargetLibraryInfoWrapperPass)<br>
+INITIALIZE_PASS_DEPENDENCY(<wbr>AAResultsWrapperPass)<br>
+INITIALIZE_PASS_END(<wbr>HexagonLoopIdiomRecognize, "hexagon-loop-idiom",<br>
+ "Recognize Hexagon-specific loop idioms", false, false)<br>
+<br>
+<br>
+//===------------------------<wbr>------------------------------<wbr>----------------===//<br>
+//<br>
+// Implementation of PolynomialMultiplyRecognize<br>
+//<br>
+//===------------------------<wbr>------------------------------<wbr>----------------===//<br>
+<br>
+namespace {<br>
+ class PolynomialMultiplyRecognize {<br>
+ public:<br>
+ explicit PolynomialMultiplyRecognize(<wbr>Loop *loop, const DataLayout &dl,<br>
+ const DominatorTree &dt, const TargetLibraryInfo &tli,<br>
+ ScalarEvolution &se)<br>
+ : CurLoop(loop), DL(dl), DT(dt), TLI(tli), SE(se) {}<br>
+<br>
+ bool recognize();<br>
+ private:<br>
+ typedef SetVector<Value*> ValueSeq;<br>
+<br>
+ Value *getCountIV(BasicBlock *BB);<br>
+ bool findCycle(Value *Out, Value *In, ValueSeq &Cycle);<br>
+ void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq &Early,<br>
+ ValueSeq &Late);<br>
+ bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq &Late);<br>
+ bool commutesWithShift(Instruction *I);<br>
+ bool highBitsAreZero(Value *V, unsigned IterCount);<br>
+ bool keepsHighBitsZero(Value *V, unsigned IterCount);<br>
+ bool isOperandShifted(Instruction *I, Value *Op);<br>
+ bool convertShiftsToLeft(BasicBlock *LoopB, BasicBlock *ExitB,<br>
+ unsigned IterCount);<br>
+ void cleanupLoopBody(BasicBlock *LoopB);<br>
+<br>
+ struct ParsedValues {<br>
+ ParsedValues() : M(nullptr), P(nullptr), Q(nullptr), R(nullptr),<br>
+ X(nullptr), Res(nullptr), IterCount(0), Left(false), Inv(false) {}<br>
+ Value *M, *P, *Q, *R, *X;<br>
+ Instruction *Res;<br>
+ unsigned IterCount;<br>
+ bool Left, Inv;<br>
+ };<br>
+<br>
+ bool matchLeftShift(SelectInst *SelI, Value *CIV, ParsedValues &PV);<br>
+ bool matchRightShift(SelectInst *SelI, ParsedValues &PV);<br>
+ bool scanSelect(SelectInst *SI, BasicBlock *LoopB, BasicBlock *PrehB,<br>
+ Value *CIV, ParsedValues &PV, bool PreScan);<br>
+ unsigned getInverseMxN(unsigned QP);<br>
+ Value *generate(BasicBlock::iterator At, ParsedValues &PV);<br>
+<br>
+ Loop *CurLoop;<br>
+ const DataLayout &DL;<br>
+ const DominatorTree &DT;<br>
+ const TargetLibraryInfo &TLI;<br>
+ ScalarEvolution &SE;<br>
+ };<br>
+}<br>
+<br>
+<br>
+Value *PolynomialMultiplyRecognize::<wbr>getCountIV(BasicBlock *BB) {<br>
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);<br>
+ if (std::distance(PI, PE) != 2)<br>
+ return nullptr;<br>
+ BasicBlock *PB = (*PI == BB) ? *std::next(PI) : *PI;<br>
+<br>
+ for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {<br>
+ auto *PN = cast<PHINode>(I);<br>
+ Value *InitV = PN->getIncomingValueForBlock(<wbr>PB);<br>
+ if (!isa<ConstantInt>(InitV) || !cast<ConstantInt>(InitV)-><wbr>isZero())<br>
+ continue;<br>
+ Value *IterV = PN->getIncomingValueForBlock(<wbr>BB);<br>
+ if (!isa<BinaryOperator>(IterV))<br>
+ continue;<br>
+ auto *BO = dyn_cast<BinaryOperator>(<wbr>IterV);<br>
+ if (BO->getOpcode() != Instruction::Add)<br>
+ continue;<br>
+ Value *IncV = nullptr;<br>
+ if (BO->getOperand(0) == PN)<br>
+ IncV = BO->getOperand(1);<br>
+ else if (BO->getOperand(1) == PN)<br>
+ IncV = BO->getOperand(0);<br>
+ if (IncV == nullptr)<br>
+ continue;<br>
+<br>
+ if (auto *T = dyn_cast<ConstantInt>(IncV))<br>
+ if (T->getZExtValue() == 1)<br>
+ return PN;<br>
+ }<br>
+ return nullptr;<br>
+}<br>
+<br>
+<br>
+static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB) {<br>
+ for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) {<br>
+ Use &TheUse = UI.getUse();<br>
+ ++UI;<br>
+ if (auto *II = dyn_cast<Instruction>(TheUse.<wbr>getUser()))<br>
+ if (BB == II->getParent())<br>
+ II->replaceUsesOfWith(I, J);<br>
+ }<br>
+}<br>
+<br>
+<br>
+bool PolynomialMultiplyRecognize::<wbr>matchLeftShift(SelectInst *SelI,<br>
+ Value *CIV, ParsedValues &PV) {<br>
+ // Match the following:<br>
+ // select (X & (1 << i)) != 0 ? R ^ (Q << i) : R<br>
+ // select (X & (1 << i)) == 0 ? R : R ^ (Q << i)<br>
+ // The condition may also check for equality with the masked value, i.e<br>
+ // select (X & (1 << i)) == (1 << i) ? R ^ (Q << i) : R<br>
+ // select (X & (1 << i)) != (1 << i) ? R : R ^ (Q << i);<br>
+<br>
+ Value *CondV = SelI->getCondition();<br>
+ Value *TrueV = SelI->getTrueValue();<br>
+ Value *FalseV = SelI->getFalseValue();<br>
+<br>
+ using namespace PatternMatch;<br>
+<br>
+ CmpInst::Predicate P;<br>
+ Value *A = nullptr, *B = nullptr, *C = nullptr;<br>
+<br>
+ if (!match(CondV, m_ICmp(P, m_And(m_Value(A), m_Value(B)), m_Value(C))) &&<br>
+ !match(CondV, m_ICmp(P, m_Value(C), m_And(m_Value(A), m_Value(B)))))<br>
+ return false;<br>
+ if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)<br>
+ return false;<br>
+ // Matched: select (A & B) == C ? ... : ...<br>
+ // select (A & B) != C ? ... : ...<br>
+<br>
+ Value *X = nullptr, *Sh1 = nullptr;<br>
+ // Check (A & B) for (X & (1 << i)):<br>
+ if (match(A, m_Shl(m_One(), m_Specific(CIV)))) {<br>
+ Sh1 = A;<br>
+ X = B;<br>
+ } else if (match(B, m_Shl(m_One(), m_Specific(CIV)))) {<br>
+ Sh1 = B;<br>
+ X = A;<br>
+ } else {<br>
+ // TODO: Could also check for an induction variable containing single<br>
+ // bit shifted left by 1 in each iteration.<br>
+ return false;<br>
+ }<br>
+<br>
+ bool TrueIfZero;<br>
+<br>
+ // Check C against the possible values for comparison: 0 and (1 << i):<br>
+ if (match(C, m_Zero()))<br>
+ TrueIfZero = (P == CmpInst::ICMP_EQ);<br>
+ else if (C == Sh1)<br>
+ TrueIfZero = (P == CmpInst::ICMP_NE);<br>
+ else<br>
+ return false;<br>
+<br>
+ // So far, matched:<br>
+ // select (X & (1 << i)) ? ... : ...<br>
+ // including variations of the check against zero/non-zero value.<br>
+<br>
+ Value *ShouldSameV = nullptr, *ShouldXoredV = nullptr;<br>
+ if (TrueIfZero) {<br>
+ ShouldSameV = TrueV;<br>
+ ShouldXoredV = FalseV;<br>
+ } else {<br>
+ ShouldSameV = FalseV;<br>
+ ShouldXoredV = TrueV;<br>
+ }<br>
+<br>
+ Value *Q = nullptr, *R = nullptr, *Y = nullptr, *Z = nullptr;<br>
+ Value *T = nullptr;<br>
+ if (match(ShouldXoredV, m_Xor(m_Value(Y), m_Value(Z)))) {<br>
+ // Matched: select +++ ? ... : Y ^ Z<br>
+ // select +++ ? Y ^ Z : ...<br>
+ // where +++ denotes previously checked matches.<br>
+ if (ShouldSameV == Y)<br>
+ T = Z;<br>
+ else if (ShouldSameV == Z)<br>
+ T = Y;<br>
+ else<br>
+ return false;<br>
+ R = ShouldSameV;<br>
+ // Matched: select +++ ? R : R ^ T<br>
+ // select +++ ? R ^ T : R<br>
+ // depending on TrueIfZero.<br>
+<br>
+ } else if (match(ShouldSameV, m_Zero())) {<br>
+ // Matched: select +++ ? 0 : ...<br>
+ // select +++ ? ... : 0<br>
+ if (!SelI->hasOneUse())<br>
+ return false;<br>
+ T = ShouldXoredV;<br>
+ // Matched: select +++ ? 0 : T<br>
+ // select +++ ? T : 0<br>
+<br>
+ Value *U = *SelI->user_begin();<br>
+ if (!match(U, m_Xor(m_Specific(SelI), m_Value(R))) &&<br>
+ !match(U, m_Xor(m_Value(R), m_Specific(SelI))))<br>
+ return false;<br>
+ // Matched: xor (select +++ ? 0 : T), R<br>
+ // xor (select +++ ? T : 0), R<br>
+ } else<br>
+ return false;<br>
+<br>
+ // The xor input value T is isolated into its own match so that it could<br>
+ // be checked against an induction variable containing a shifted bit<br>
+ // (todo).<br>
+ // For now, check against (Q << i).<br>
+ if (!match(T, m_Shl(m_Value(Q), m_Specific(CIV))) &&<br>
+ !match(T, m_Shl(m_ZExt(m_Value(Q)), m_ZExt(m_Specific(CIV)))))<br>
+ return false;<br>
+ // Matched: select +++ ? R : R ^ (Q << i)<br>
+ // select +++ ? R ^ (Q << i) : R<br>
+<br>
+ PV.X = X;<br>
+ PV.Q = Q;<br>
+ PV.R = R;<br>
+ PV.Left = true;<br>
+ return true;<br>
+}<br>
+<br>
+<br>
+bool PolynomialMultiplyRecognize::<wbr>matchRightShift(SelectInst *SelI,<br>
+ ParsedValues &PV) {<br>
+ // Match the following:<br>
+ // select (X & 1) != 0 ? (R >> 1) ^ Q : (R >> 1)<br>
+ // select (X & 1) == 0 ? (R >> 1) : (R >> 1) ^ Q<br>
+ // The condition may also check for equality with the masked value, i.e<br>
+ // select (X & 1) == 1 ? (R >> 1) ^ Q : (R >> 1)<br>
+ // select (X & 1) != 1 ? (R >> 1) : (R >> 1) ^ Q<br>
+<br>
+ Value *CondV = SelI->getCondition();<br>
+ Value *TrueV = SelI->getTrueValue();<br>
+ Value *FalseV = SelI->getFalseValue();<br>
+<br>
+ using namespace PatternMatch;<br>
+<br>
+ Value *C = nullptr;<br>
+ CmpInst::Predicate P;<br>
+ bool TrueIfZero;<br>
+<br>
+ if (match(CondV, m_ICmp(P, m_Value(C), m_Zero())) ||<br>
+ match(CondV, m_ICmp(P, m_Zero(), m_Value(C)))) {<br>
+ if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)<br>
+ return false;<br>
+ // Matched: select C == 0 ? ... : ...<br>
+ // select C != 0 ? ... : ...<br>
+ TrueIfZero = (P == CmpInst::ICMP_EQ);<br>
+ } else if (match(CondV, m_ICmp(P, m_Value(C), m_One())) ||<br>
+ match(CondV, m_ICmp(P, m_One(), m_Value(C)))) {<br>
+ if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)<br>
+ return false;<br>
+ // Matched: select C == 1 ? ... : ...<br>
+ // select C != 1 ? ... : ...<br>
+ TrueIfZero = (P == CmpInst::ICMP_NE);<br>
+ } else<br>
+ return false;<br>
+<br>
+ Value *X = nullptr;<br>
+ if (!match(C, m_And(m_Value(X), m_One())) &&<br>
+ !match(C, m_And(m_One(), m_Value(X))))<br>
+ return false;<br>
+ // Matched: select (X & 1) == +++ ? ... : ...<br>
+ // select (X & 1) != +++ ? ... : ...<br>
+<br>
+ Value *R = nullptr, *Q = nullptr;<br>
+ if (TrueIfZero) {<br>
+ // The select's condition is true if the tested bit is 0.<br>
+ // TrueV must be the shift, FalseV must be the xor.<br>
+ if (!match(TrueV, m_LShr(m_Value(R), m_One())))<br>
+ return false;<br>
+ // Matched: select +++ ? (R >> 1) : ...<br>
+ if (!match(FalseV, m_Xor(m_Specific(TrueV), m_Value(Q))) &&<br>
+ !match(FalseV, m_Xor(m_Value(Q), m_Specific(TrueV))))<br>
+ return false;<br>
+ // Matched: select +++ ? (R >> 1) : (R >> 1) ^ Q<br>
+ // with commuting ^.<br>
+ } else {<br>
+ // The select's condition is true if the tested bit is 1.<br>
+ // TrueV must be the xor, FalseV must be the shift.<br>
+ if (!match(FalseV, m_LShr(m_Value(R), m_One())))<br>
+ return false;<br>
+ // Matched: select +++ ? ... : (R >> 1)<br>
+ if (!match(TrueV, m_Xor(m_Specific(FalseV), m_Value(Q))) &&<br>
+ !match(TrueV, m_Xor(m_Value(Q), m_Specific(FalseV))))<br>
+ return false;<br>
+ // Matched: select +++ ? (R >> 1) ^ Q : (R >> 1)<br>
+ // with commuting ^.<br>
+ }<br>
+<br>
+ PV.X = X;<br>
+ PV.Q = Q;<br>
+ PV.R = R;<br>
+ PV.Left = false;<br>
+ return true;<br>
+}<br>
+<br>
+<br>
+bool PolynomialMultiplyRecognize::<wbr>scanSelect(SelectInst *SelI,<br>
+ BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV, ParsedValues &PV,<br>
+ bool PreScan) {<br>
+ using namespace PatternMatch;<br>
+<br>
+ // The basic pattern for R = P.Q is:<br>
+ // for i = 0..31<br>
+ // R = phi (0, R')<br>
+ // if (P & (1 << i)) ; test-bit(P, i)<br>
+ // R' = R ^ (Q << i)<br>
+ //<br>
+ // Similarly, the basic pattern for R = (P/Q).Q - P<br>
+ // for i = 0..31<br>
+ // R = phi(P, R')<br>
+ // if (R & (1 << i))<br>
+ // R' = R ^ (Q << i)<br>
+<br>
+ // There exist idioms, where instead of Q being shifted left, P is shifted<br>
+ // right. This produces a result that is shifted right by 32 bits (the<br>
+ // non-shifted result is 64-bit).<br>
+ //<br>
+ // For R = P.Q, this would be:<br>
+ // for i = 0..31<br>
+ // R = phi (0, R')<br>
+ // if ((P >> i) & 1)<br>
+ // R' = (R >> 1) ^ Q ; R is cycled through the loop, so it must<br>
+ // else ; be shifted by 1, not i.<br>
+ // R' = R >> 1<br>
+ //<br>
+ // And for the inverse:<br>
+ // for i = 0..31<br>
+ // R = phi (P, R')<br>
+ // if (R & 1)<br>
+ // R' = (R >> 1) ^ Q<br>
+ // else<br>
+ // R' = R >> 1<br>
+<br>
+ // The left-shifting idioms share the same pattern:<br>
+ // select (X & (1 << i)) ? R ^ (Q << i) : R<br>
+ // Similarly for right-shifting idioms:<br>
+ // select (X & 1) ? (R >> 1) ^ Q<br>
+<br>
+ if (matchLeftShift(SelI, CIV, PV)) {<br>
+ // If this is a pre-scan, getting this far is sufficient.<br>
+ if (PreScan)<br>
+ return true;<br>
+<br>
+ // Need to make sure that the SelI goes back into R.<br>
+ auto *RPhi = dyn_cast<PHINode>(PV.R);<br>
+ if (!RPhi)<br>
+ return false;<br>
+ if (SelI != RPhi-><wbr>getIncomingValueForBlock(<wbr>LoopB))<br>
+ return false;<br>
+ PV.Res = SelI;<br>
+<br>
+ // If X is loop invariant, it must be the input polynomial, and the<br>
+ // idiom is the basic polynomial multiply.<br>
+ if (CurLoop->isLoopInvariant(PV.<wbr>X)) {<br>
+ PV.P = PV.X;<br>
+ PV.Inv = false;<br>
+ } else {<br>
+ // X is not loop invariant. If X == R, this is the inverse pmpy.<br>
+ // Otherwise, check for an xor with an invariant value. If the<br>
+ // variable argument to the xor is R, then this is still a valid<br>
+ // inverse pmpy.<br>
+ PV.Inv = true;<br>
+ if (PV.X != PV.R) {<br>
+ Value *Var = nullptr, *Inv = nullptr, *X1 = nullptr, *X2 = nullptr;<br>
+ if (!match(PV.X, m_Xor(m_Value(X1), m_Value(X2))))<br>
+ return false;<br>
+ auto *I1 = dyn_cast<Instruction>(X1);<br>
+ auto *I2 = dyn_cast<Instruction>(X2);<br>
+ if (!I1 || I1->getParent() != LoopB) {<br>
+ Var = X2;<br>
+ Inv = X1;<br>
+ } else if (!I2 || I2->getParent() != LoopB) {<br>
+ Var = X1;<br>
+ Inv = X2;<br>
+ } else<br>
+ return false;<br>
+ if (Var != PV.R)<br>
+ return false;<br>
+ PV.M = Inv;<br>
+ }<br>
+ // The input polynomial P still needs to be determined. It will be<br>
+ // the entry value of R.<br>
+ Value *EntryP = RPhi-><wbr>getIncomingValueForBlock(<wbr>PrehB);<br>
+ PV.P = EntryP;<br>
+ }<br>
+<br>
+ return true;<br>
+ }<br>
+<br>
+ if (matchRightShift(SelI, PV)) {<br>
+ // If this is an inverse pattern, the Q polynomial must be known at<br>
+ // compile time.<br>
+ if (PV.Inv && !isa<ConstantInt>(PV.Q))<br>
+ return false;<br>
+ if (PreScan)<br>
+ return true;<br>
+ // There is no exact matching of right-shift pmpy.<br>
+ return false;<br>
+ }<br>
+<br>
+ return false;<br>
+}<br>
+<br>
+<br>
+bool PolynomialMultiplyRecognize::<wbr>findCycle(Value *Out, Value *In,<br>
+ ValueSeq &Cycle) {<br>
+ // Out = ..., In, ...<br>
+ if (Out == In)<br>
+ return true;<br>
+<br>
+ auto *BB = cast<Instruction>(Out)-><wbr>getParent();<br>
+ bool HadPhi = false;<br>
+<br>
+ for (auto U : Out->users()) {<br>
+ auto *I = dyn_cast<Instruction>(&*U);<br>
+ if (I == nullptr || I->getParent() != BB)<br>
+ continue;<br>
+ // Make sure that there are no multi-iteration cycles, e.g.<br>
+ // p1 = phi(p2)<br>
+ // p2 = phi(p1)<br>
+ // The cycle p1->p2->p1 would span two loop iterations.<br>
+ // Check that there is only one phi in the cycle.<br>
+ bool IsPhi = isa<PHINode>(I);<br>
+ if (IsPhi && HadPhi)<br>
+ return false;<br>
+ HadPhi |= IsPhi;<br>
+ if (Cycle.count(I))<br>
+ return false;<br>
+ Cycle.insert(I);<br>
+ if (findCycle(I, In, Cycle))<br>
+ break;<br>
+ Cycle.remove(I);<br>
+ }<br>
+ return !Cycle.empty();<br>
+}<br>
+<br>
+<br>
+void PolynomialMultiplyRecognize::<wbr>classifyCycle(Instruction *DivI,<br>
+ ValueSeq &Cycle, ValueSeq &Early, ValueSeq &Late) {<br>
+ // All the values in the cycle that are between the phi node and the<br>
+ // divider instruction will be classified as "early", all other values<br>
+ // will be "late".<br>
+<br>
+ bool IsE = true;<br>
+ unsigned I, N = Cycle.size();<br>
+ for (I = 0; I < N; ++I) {<br>
+ Value *V = Cycle[I];<br>
+ if (DivI == V)<br>
+ IsE = false;<br>
+ else if (!isa<PHINode>(V))<br>
+ continue;<br>
+ // Stop if found either.<br>
+ break;<br>
+ }<br>
+ // "I" is the index of either DivI or the phi node, whichever was first.<br>
+ // "E" is "false" or "true" respectively.<br>
+ ValueSeq &First = !IsE ? Early : Late;<br>
+ for (unsigned J = 0; J < I; ++J)<br>
+ First.insert(Cycle[J]);<br>
+<br>
+ ValueSeq &Second = IsE ? Early : Late;<br>
+ Second.insert(Cycle[I]);<br>
+ for (++I; I < N; ++I) {<br>
+ Value *V = Cycle[I];<br>
+ if (DivI == V || isa<PHINode>(V))<br>
+ break;<br>
+ Second.insert(V);<br>
+ }<br>
+<br>
+ for (; I < N; ++I)<br>
+ First.insert(Cycle[I]);<br>
+}<br>
+<br>
+<br>
+bool PolynomialMultiplyRecognize::<wbr>classifyInst(Instruction *UseI,<br>
+ ValueSeq &Early, ValueSeq &Late) {<br>
+ // Select is an exception, since the condition value does not have to be<br>
+ // classified in the same way as the true/false values. The true/false<br>
+ // values do have to be both early or both late.<br>
+ if (UseI->getOpcode() == Instruction::Select) {<br>
+ Value *TV = UseI->getOperand(1), *FV = UseI->getOperand(2);<br>
+ if (Early.count(TV) || Early.count(FV)) {<br>
+ if (Late.count(TV) || Late.count(FV))<br>
+ return false;<br>
+ Early.insert(UseI);<br>
+ } else if (Late.count(TV) || Late.count(FV)) {<br>
+ if (Early.count(TV) || Early.count(FV))<br>
+ return false;<br>
+ Late.insert(UseI);<br>
+ }<br>
+ return true;<br>
+ }<br>
+<br>
+ // Not sure what would be the example of this, but the code below relies<br>
+ // on having at least one operand.<br>
+ if (UseI->getNumOperands() == 0)<br>
+ return true;<br>
+<br>
+ bool AE = true, AL = true;<br>
+ for (auto &I : UseI->operands()) {<br>
+ if (Early.count(&*I))<br>
+ AL = false;<br>
+ else if (Late.count(&*I))<br>
+ AE = false;<br>
+ }<br>
+ // If the operands appear "all early" and "all late" at the same time,<br>
+ // then it means that none of them are actually classified as either.<br>
+ // This is harmless.<br>
+ if (AE && AL)<br>
+ return true;<br>
+ // Conversely, if they are neither "all early" nor "all late", then<br>
+ // we have a mixture of early and late operands that is not a known<br>
+ // exception.<br>
+ if (!AE && !AL)<br>
+ return false;<br>
+<br>
+ // Check that we have covered the two special cases.<br>
+ assert(AE != AL);<br>
+<br>
+ if (AE)<br>
+ Early.insert(UseI);<br>
+ else<br>
+ Late.insert(UseI);<br>
+ return true;<br>
+}<br>
+<br>
+<br>
+bool PolynomialMultiplyRecognize::<wbr>commutesWithShift(Instruction *I) {<br>
+ switch (I->getOpcode()) {<br>
+ case Instruction::And:<br>
+ case Instruction::Or:<br>
+ case Instruction::Xor:<br>
+ case Instruction::LShr:<br>
+ case Instruction::Shl:<br>
+ case Instruction::Select:<br>
+ case Instruction::ICmp:<br>
+ case Instruction::PHI:<br>
+ break;<br>
+ default:<br>
+ return false;<br>
+ }<br>
+ return true;<br>
+}<br>
+<br>
+<br>
+bool PolynomialMultiplyRecognize::<wbr>highBitsAreZero(Value *V,<br>
+ unsigned IterCount) {<br>
+ auto *T = dyn_cast<IntegerType>(V-><wbr>getType());<br>
+ if (!T)<br>
+ return false;<br>
+<br>
+ unsigned BW = T->getBitWidth();<br>
+ APInt K0(BW, 0), K1(BW, 0);<br>
+ computeKnownBits(V, K0, K1, DL);<br>
+ return K0.countLeadingOnes() >= IterCount;<br>
+}<br>
+<br>
+<br>
+bool PolynomialMultiplyRecognize::<wbr>keepsHighBitsZero(Value *V,<br>
+ unsigned IterCount) {<br>
+ // Assume that all inputs to the value have the high bits zero.<br>
+ // Check if the value itself preserves the zeros in the high bits.<br>
+ if (auto *C = dyn_cast<ConstantInt>(V))<br>
+ return C->getValue().<wbr>countLeadingZeros() >= IterCount;<br>
+<br>
+ if (auto *I = dyn_cast<Instruction>(V)) {<br>
+ switch (I->getOpcode()) {<br>
+ case Instruction::And:<br>
+ case Instruction::Or:<br>
+ case Instruction::Xor:<br>
+ case Instruction::LShr:<br>
+ case Instruction::Select:<br>
+ case Instruction::ICmp:<br>
+ case Instruction::PHI:<br>
+ return true;<br>
+ }<br>
+ }<br>
+<br>
+ return false;<br>
+}<br>
+<br>
+<br>
+bool PolynomialMultiplyRecognize::<wbr>isOperandShifted(Instruction *I, Value *Op) {<br>
+ unsigned Opc = I->getOpcode();<br>
+ if (Opc == Instruction::Shl || Opc == Instruction::LShr)<br>
+ return Op != I->getOperand(1);<br>
+ return true;<br>
+}<br>
+<br>
+<br>
+bool PolynomialMultiplyRecognize::<wbr>convertShiftsToLeft(BasicBlock *LoopB,<br>
+ BasicBlock *ExitB, unsigned IterCount) {<br>
+ Value *CIV = getCountIV(LoopB);<br>
+ if (CIV == nullptr)<br>
+ return false;<br>
+ auto *CIVTy = dyn_cast<IntegerType>(CIV-><wbr>getType());<br>
+ if (CIVTy == nullptr)<br>
+ return false;<br>
+<br>
+ ValueSeq RShifts;<br>
+ ValueSeq Early, Late, Cycled;<br>
+<br>
+ // Find all value cycles that contain logical right shifts by 1.<br>
+ for (Instruction &I : *LoopB) {<br>
+ using namespace PatternMatch;<br>
+ Value *V = nullptr;<br>
+ if (!match(&I, m_LShr(m_Value(V), m_One())))<br>
+ continue;<br>
+ ValueSeq C;<br>
+ if (!findCycle(&I, V, C))<br>
+ continue;<br>
+<br>
+ // Found a cycle.<br>
+ C.insert(&I);<br>
+ classifyCycle(&I, C, Early, Late);<br>
+ Cycled.insert(C.begin(), C.end());<br>
+ RShifts.insert(&I);<br>
+ }<br>
+<br>
+ // Find the set of all values affected by the shift cycles, i.e. all<br>
+ // cycled values, and (recursively) all their users.<br>
+ ValueSeq Users(Cycled.begin(), Cycled.end());<br>
+ for (unsigned i = 0; i < Users.size(); ++i) {<br>
+ Value *V = Users[i];<br>
+ if (!isa<IntegerType>(V->getType(<wbr>)))<br>
+ return false;<br>
+ auto *R = cast<Instruction>(V);<br>
+ // If the instruction does not commute with shifts, the loop cannot<br>
+ // be unshifted.<br>
+ if (!commutesWithShift(R))<br>
+ return false;<br>
+ for (auto I = R->user_begin(), E = R->user_end(); I != E; ++I) {<br>
+ auto *T = cast<Instruction>(*I);<br>
+ // Skip users from outside of the loop. They will be handled later.<br>
+ // Also, skip the right-shifts and phi nodes, since they mix early<br>
+ // and late values.<br>
+ if (T->getParent() != LoopB || RShifts.count(T) || isa<PHINode>(T))<br>
+ continue;<br>
+<br>
+ Users.insert(T);<br>
+ if (!classifyInst(T, Early, Late))<br>
+ return false;<br>
+ }<br>
+ }<br>
+<br>
+ if (Users.size() == 0)<br>
+ return false;<br>
+<br>
+ // Verify that high bits remain zero.<br>
+ ValueSeq Internal(Users.begin(), Users.end());<br>
+ ValueSeq Inputs;<br>
+ for (unsigned i = 0; i < Internal.size(); ++i) {<br>
+ auto *R = dyn_cast<Instruction>(<wbr>Internal[i]);<br>
+ if (!R)<br>
+ continue;<br>
+ for (Value *Op : R->operands()) {<br>
+ auto *T = dyn_cast<Instruction>(Op);<br>
+ if (T && T->getParent() != LoopB)<br>
+ Inputs.insert(Op);<br>
+ else<br>
+ Internal.insert(Op);<br>
+ }<br>
+ }<br>
+ for (Value *V : Inputs)<br>
+ if (!highBitsAreZero(V, IterCount))<br>
+ return false;<br>
+ for (Value *V : Internal)<br>
+ if (!keepsHighBitsZero(V, IterCount))<br>
+ return false;<br>
+<br>
+ // Finally, the work can be done. Unshift each user.<br>
+ IRBuilder<> IRB(LoopB);<br>
+ std::map<Value*,Value*> ShiftMap;<br>
+ typedef std::map<std::pair<Value*,<wbr>Type*>,Value*> CastMapType;<br>
+ CastMapType CastMap;<br>
+<br>
+ auto upcast = [] (CastMapType &CM, IRBuilder<> &IRB, Value *V,<br>
+ IntegerType *Ty) -> Value* {<br>
+ auto H = CM.find(std::make_pair(V, Ty));<br>
+ if (H != CM.end())<br>
+ return H->second;<br>
+ Value *CV = IRB.CreateIntCast(V, Ty, false);<br>
+ CM.insert(std::make_pair(std::<wbr>make_pair(V, Ty), CV));<br>
+ return CV;<br>
+ };<br>
+<br>
+ for (auto I = LoopB->begin(), E = LoopB->end(); I != E; ++I) {<br>
+ if (isa<PHINode>(I) || !Users.count(&*I))<br>
+ continue;<br>
+ using namespace PatternMatch;<br>
+ // Match lshr x, 1.<br>
+ Value *V = nullptr;<br>
+ if (match(&*I, m_LShr(m_Value(V), m_One()))) {<br>
+ replaceAllUsesOfWithIn(&*I, V, LoopB);<br>
+ continue;<br>
+ }<br>
+ // For each non-cycled operand, replace it with the corresponding<br>
+ // value shifted left.<br>
+ for (auto &J : I->operands()) {<br>
+ Value *Op = J.get();<br>
+ if (!isOperandShifted(&*I, Op))<br>
+ continue;<br>
+ if (Users.count(Op))<br>
+ continue;<br>
+ // Skip shifting zeros.<br>
+ if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero(<wbr>))<br>
+ continue;<br>
+ // Check if we have already generated a shift for this value.<br>
+ auto F = ShiftMap.find(Op);<br>
+ Value *W = (F != ShiftMap.end()) ? F->second : nullptr;<br>
+ if (W == nullptr) {<br>
+ IRB.SetInsertPoint(&*I);<br>
+ // First, the shift amount will be CIV or CIV+1, depending on<br>
+ // whether the value is early or late. Instead of creating CIV+1,<br>
+ // do a single shift of the value.<br>
+ Value *ShAmt = CIV, *ShVal = Op;<br>
+ auto *VTy = cast<IntegerType>(ShVal-><wbr>getType());<br>
+ auto *ATy = cast<IntegerType>(ShAmt-><wbr>getType());<br>
+ if (Late.count(&*I))<br>
+ ShVal = IRB.CreateShl(Op, ConstantInt::get(VTy, 1));<br>
+ // Second, the types of the shifted value and the shift amount<br>
+ // must match.<br>
+ if (VTy != ATy) {<br>
+ if (VTy->getBitWidth() < ATy->getBitWidth())<br>
+ ShVal = upcast(CastMap, IRB, ShVal, ATy);<br>
+ else<br>
+ ShAmt = upcast(CastMap, IRB, ShAmt, VTy);<br>
+ }<br>
+ // Ready to generate the shift and memoize it.<br>
+ W = IRB.CreateShl(ShVal, ShAmt);<br>
+ ShiftMap.insert(std::make_<wbr>pair(Op, W));<br>
+ }<br>
+ I->replaceUsesOfWith(Op, W);<br>
+ }<br>
+ }<br>
+<br>
+ // Update the users outside of the loop to account for having left<br>
+ // shifts. They would normally be shifted right in the loop, so shift<br>
+ // them right after the loop exit.<br>
+ // Take advantage of the loop-closed SSA form, which has all the post-<br>
+ // loop values in phi nodes.<br>
+ IRB.SetInsertPoint(ExitB, ExitB->getFirstInsertionPt());<br>
+ for (auto P = ExitB->begin(), Q = ExitB->end(); P != Q; ++P) {<br>
+ if (!isa<PHINode>(P))<br>
+ break;<br>
+ auto *PN = cast<PHINode>(P);<br>
+ Value *U = PN->getIncomingValueForBlock(<wbr>LoopB);<br>
+ if (!Users.count(U))<br>
+ continue;<br>
+ Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType()<wbr>, IterCount));<br>
+ PN->replaceAllUsesWith(S);<br>
+ // The above RAUW will create<br>
+ // S = lshr S, IterCount<br>
+ // so we need to fix it back into<br>
+ // S = lshr PN, IterCount<br>
+ cast<User>(S)-><wbr>replaceUsesOfWith(S, PN);<br>
+ }<br>
+<br>
+ return true;<br>
+}<br>
+<br>
+<br>
+void PolynomialMultiplyRecognize::<wbr>cleanupLoopBody(BasicBlock *LoopB) {<br>
+ for (auto &I : *LoopB)<br>
+ if (Value *SV = SimplifyInstruction(&I, DL, &TLI, &DT))<br>
+ I.replaceAllUsesWith(SV);<br>
+<br>
+ for (auto I = LoopB->begin(), N = I; I != LoopB->end(); I = N) {<br>
+ N = std::next(I);<br>
+ RecursivelyDeleteTriviallyDead<wbr>Instructions(&*I, &TLI);<br>
+ }<br>
+}<br>
+<br>
+<br>
+unsigned PolynomialMultiplyRecognize::<wbr>getInverseMxN(unsigned QP) {<br>
+ // Arrays of coefficients of Q and the inverse, C.<br>
+ // Q[i] = coefficient at x^i.<br>
+ std::array<char,32> Q, C;<br>
+<br>
+ for (unsigned i = 0; i < 32; ++i) {<br>
+ Q[i] = QP & 1;<br>
+ QP >>= 1;<br>
+ }<br>
+ assert(Q[0] == 1);<br>
+<br>
+ // Find C, such that<br>
+ // (Q[n]*x^n + ... + Q[1]*x + Q[0]) * (C[n]*x^n + ... + C[1]*x + C[0]) = 1<br>
+ //<br>
+ // For it to have a solution, Q[0] must be 1. Since this is Z2[x], the<br>
+ // operations * and + are & and ^ respectively.<br>
+ //<br>
+ // Find C[i] recursively, by comparing i-th coefficient in the product<br>
+ // with 0 (or 1 for i=0).<br>
+ //<br>
+ // C[0] = 1, since C[0] = Q[0], and Q[0] = 1.<br>
+ C[0] = 1;<br>
+ for (unsigned i = 1; i < 32; ++i) {<br>
+ // Solve for C[i] in:<br>
+ // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i]Q[0] = 0<br>
+ // This is equivalent to<br>
+ // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i] = 0<br>
+ // which is<br>
+ // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] = C[i]<br>
+ unsigned T = 0;<br>
+ for (unsigned j = 0; j < i; ++j)<br>
+ T = T ^ (C[j] & Q[i-j]);<br>
+ C[i] = T;<br>
+ }<br>
+<br>
+ unsigned QV = 0;<br>
+ for (unsigned i = 0; i < 32; ++i)<br>
+ if (C[i])<br>
+ QV |= (1 << i);<br>
+<br>
+ return QV;<br>
+}<br>
+<br>
+<br>
+Value *PolynomialMultiplyRecognize::<wbr>generate(BasicBlock::iterator At,<br>
+ ParsedValues &PV) {<br>
+ IRBuilder<> B(&*At);<br>
+ Module *M = At->getParent()->getParent()-><wbr>getParent();<br>
+ Value *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw);<br>
+<br>
+ Value *P = PV.P, *Q = PV.Q, *P0 = P;<br>
+ unsigned IC = PV.IterCount;<br>
+<br>
+ if (PV.M != nullptr)<br>
+ P0 = P = B.CreateXor(P, PV.M);<br>
+<br>
+ // Create a bit mask to clear the high bits beyond IterCount.<br>
+ auto *BMI = ConstantInt::get(P->getType(), APInt::getLowBitsSet(32, IC));<br>
+<br>
+ if (PV.IterCount != 32)<br>
+ P = B.CreateAnd(P, BMI);<br>
+<br>
+ if (PV.Inv) {<br>
+ auto *QI = dyn_cast<ConstantInt>(PV.Q);<br>
+ assert(QI && QI->getBitWidth() <= 32);<br>
+<br>
+ // Again, clearing bits beyond IterCount.<br>
+ unsigned M = (1 << PV.IterCount) - 1;<br>
+ unsigned Tmp = (QI->getZExtValue() | 1) & M;<br>
+ unsigned QV = getInverseMxN(Tmp) & M;<br>
+ auto *QVI = ConstantInt::get(QI->getType()<wbr>, QV);<br>
+ P = B.CreateCall(PMF, {P, QVI});<br>
+ P = B.CreateTrunc(P, QI->getType());<br>
+ if (IC != 32)<br>
+ P = B.CreateAnd(P, BMI);<br>
+ }<br>
+<br>
+ Value *R = B.CreateCall(PMF, {P, Q});<br>
+<br>
+ if (PV.M != nullptr)<br>
+ R = B.CreateXor(R, B.CreateIntCast(P0, R->getType(), false));<br>
+<br>
+ return R;<br>
+}<br>
+<br>
+<br>
+bool PolynomialMultiplyRecognize::<wbr>recognize() {<br>
+ // Restrictions:<br>
+ // - The loop must consist of a single block.<br>
+ // - The iteration count must be known at compile-time.<br>
+ // - The loop must have an induction variable starting from 0, and<br>
+ // incremented in each iteration of the loop.<br>
+ BasicBlock *LoopB = CurLoop->getHeader();<br>
+ if (LoopB != CurLoop->getLoopLatch())<br>
+ return false;<br>
+ BasicBlock *ExitB = CurLoop->getExitBlock();<br>
+ if (ExitB == nullptr)<br>
+ return false;<br>
+ BasicBlock *EntryB = CurLoop->getLoopPreheader();<br>
+ if (EntryB == nullptr)<br>
+ return false;<br>
+<br>
+ unsigned IterCount = 0;<br>
+ const SCEV *CT = SE.getBackedgeTakenCount(<wbr>CurLoop);<br>
+ if (isa<SCEVCouldNotCompute>(CT))<br>
+ return false;<br>
+ if (auto *CV = dyn_cast<SCEVConstant>(CT))<br>
+ IterCount = CV->getValue()->getZExtValue() + 1;<br>
+<br>
+ Value *CIV = getCountIV(LoopB);<br>
+ ParsedValues PV;<br>
+ PV.IterCount = IterCount;<br>
+<br>
+ // Test function to see if a given select instruction is a part of the<br>
+ // pmpy pattern. The argument PreScan set to "true" indicates that only<br>
+ // a preliminary scan is needed, "false" indicated an exact match.<br>
+ auto CouldBePmpy = [this, LoopB, EntryB, CIV, &PV] (bool PreScan)<br>
+ -> std::function<bool (Instruction &I)> {<br>
+ return [this, LoopB, EntryB, CIV, &PV, PreScan] (Instruction &I) -> bool {<br>
+ if (auto *SelI = dyn_cast<SelectInst>(&I))<br>
+ return scanSelect(SelI, LoopB, EntryB, CIV, PV, PreScan);<br>
+ return false;<br>
+ };<br>
+ };<br>
+ auto PreF = std::find_if(LoopB->begin(), LoopB->end(), CouldBePmpy(true));<br>
+ if (PreF == LoopB->end())<br>
+ return false;<br>
+<br>
+ if (!PV.Left) {<br>
+ convertShiftsToLeft(LoopB, ExitB, IterCount);<br>
+ cleanupLoopBody(LoopB);<br>
+ }<br>
+<br>
+ auto PostF = std::find_if(LoopB->begin(), LoopB->end(), CouldBePmpy(false));<br>
+ if (PostF == LoopB->end())<br>
+ return false;<br>
+<br>
+ DEBUG({<br>
+ StringRef PP = (PV.M ? "(P+M)" : "P");<br>
+ if (!PV.Inv)<br>
+ dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n";<br>
+ else<br>
+ dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + "<br>
+ << PP << "\n";<br>
+ dbgs() << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n";<br>
+ if (PV.M)<br>
+ dbgs() << " M:" << *PV.M << "\n";<br>
+ dbgs() << " Q:" << *PV.Q << "\n";<br>
+ dbgs() << " Iteration count:" << PV.IterCount << "\n";<br>
+ });<br>
+<br>
+ BasicBlock::iterator At(EntryB->getTerminator());<br>
+ Value *PM = generate(At, PV);<br>
+ if (PM == nullptr)<br>
+ return false;<br>
+<br>
+ if (PM->getType() != PV.Res->getType())<br>
+ PM = IRBuilder<>(&*At).<wbr>CreateIntCast(PM, PV.Res->getType(), false);<br>
+<br>
+ PV.Res->replaceAllUsesWith(PM)<wbr>;<br>
+ PV.Res->eraseFromParent();<br>
+ return true;<br>
+}<br>
+<br>
+<br>
+unsigned HexagonLoopIdiomRecognize::<wbr>getStoreSizeInBytes(StoreInst *SI) {<br>
+ uint64_t SizeInBits = DL->getTypeSizeInBits(SI-><wbr>getValueOperand()->getType());<br>
+ assert(((SizeInBits & 7) || (SizeInBits >> 32) == 0) &&<br>
+ "Don't overflow unsigned.");<br>
+ return (unsigned)SizeInBits >> 3;<br>
+}<br>
+<br>
+<br>
+int HexagonLoopIdiomRecognize::<wbr>getSCEVStride(const SCEVAddRecExpr *S) {<br>
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S-><wbr>getOperand(1)))<br>
+ return SC->getAPInt().getSExtValue();<br>
+ return 0;<br>
+}<br>
+<br>
+<br>
+bool HexagonLoopIdiomRecognize::<wbr>isLegalStore(Loop *CurLoop, StoreInst *SI) {<br>
+ bool IsVolatile = false;<br>
+ if (SI->isVolatile() && HexagonVolatileMemcpy)<br>
+ IsVolatile = true;<br>
+ else if (!SI->isSimple())<br>
+ return false;<br>
+<br>
+ Value *StoredVal = SI->getValueOperand();<br>
+ Value *StorePtr = SI->getPointerOperand();<br>
+<br>
+ // Reject stores that are so large that they overflow an unsigned.<br>
+ uint64_t SizeInBits = DL->getTypeSizeInBits(<wbr>StoredVal->getType());<br>
+ if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)<br>
+ return false;<br>
+<br>
+ // See if the pointer expression is an AddRec like {base,+,1} on the current<br>
+ // loop, which indicates a strided store. If we have something else, it's a<br>
+ // random store we can't handle.<br>
+ auto *StoreEv = dyn_cast<SCEVAddRecExpr>(SE-><wbr>getSCEV(StorePtr));<br>
+ if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())<br>
+ return false;<br>
+<br>
+ // Check to see if the stride matches the size of the store. If so, then we<br>
+ // know that every byte is touched in the loop.<br>
+ int Stride = getSCEVStride(StoreEv);<br>
+ if (Stride == 0)<br>
+ return false;<br>
+ unsigned StoreSize = getStoreSizeInBytes(SI);<br>
+ if (StoreSize != unsigned(std::abs(Stride)))<br>
+ return false;<br>
+<br>
+ // The store must be feeding a non-volatile load.<br>
+ LoadInst *LI = dyn_cast<LoadInst>(SI-><wbr>getValueOperand());<br>
+ if (!LI || !LI->isSimple())<br>
+ return false;<br>
+<br>
+ // See if the pointer expression is an AddRec like {base,+,1} on the current<br>
+ // loop, which indicates a strided load. If we have something else, it's a<br>
+ // random load we can't handle.<br>
+ Value *LoadPtr = LI->getPointerOperand();<br>
+ auto *LoadEv = dyn_cast<SCEVAddRecExpr>(SE-><wbr>getSCEV(LoadPtr));<br>
+ if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())<br>
+ return false;<br>
+<br>
+ // The store and load must share the same stride.<br>
+ if (StoreEv->getOperand(1) != LoadEv->getOperand(1))<br>
+ return false;<br>
+<br>
+ // Success. This store can be converted into a memcpy.<br>
+ return true;<br>
+}<br>
+<br>
+<br>
+/// mayLoopAccessLocation - Return true if the specified loop might access the<br>
+/// specified pointer location, which is a loop-strided access. The 'Access'<br>
+/// argument specifies what the verboten forms of access are (read or write).<br>
+static bool<br>
+mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,<br>
+ const SCEV *BECount, unsigned StoreSize,<br>
+ AliasAnalysis &AA,<br>
+ SmallPtrSetImpl<Instruction *> &Ignored) {<br>
+ // Get the location that may be stored across the loop. Since the access<br>
+ // is strided positively through memory, we say that the modified location<br>
+ // starts at the pointer and has infinite size.<br>
+ uint64_t AccessSize = MemoryLocation::UnknownSize;<br>
+<br>
+ // If the loop iterates a fixed number of times, we can refine the access<br>
+ // size to be exactly the size of the memset, which is (BECount+1)*StoreSize<br>
+ if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(<wbr>BECount))<br>
+ AccessSize = (BECst->getValue()-><wbr>getZExtValue() + 1) * StoreSize;<br>
+<br>
+ // TODO: For this to be really effective, we have to dive into the pointer<br>
+ // operand in the store. Store to &A[i] of 100 will always return may alias<br>
+ // with store of &A[100], we need to StoreLoc to be "A" with size of 100,<br>
+ // which will then no-alias a store to &A[100].<br>
+ MemoryLocation StoreLoc(Ptr, AccessSize);<br>
+<br>
+ for (auto *B : L->blocks())<br>
+ for (auto &I : *B)<br>
+ if (Ignored.count(&I) == 0 && (AA.getModRefInfo(&I, StoreLoc) & Access))<br>
+ return true;<br>
+<br>
+ return false;<br>
+}<br>
+<br>
+<br>
+void HexagonLoopIdiomRecognize::<wbr>collectStores(Loop *CurLoop, BasicBlock *BB,<br>
+ SmallVectorImpl<StoreInst*> &Stores) {<br>
+ Stores.clear();<br>
+ for (Instruction &I : *BB)<br>
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I))<br>
+ if (isLegalStore(CurLoop, SI))<br>
+ Stores.push_back(SI);<br>
+}<br>
+<br>
+<br>
+bool HexagonLoopIdiomRecognize::<wbr>processCopyingStore(Loop *CurLoop,<br>
+ StoreInst *SI, const SCEV *BECount) {<br>
+ assert(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy) &&<br>
+ "Expected only non-volatile stores, or Hexagon-specific memcpy"<br>
+ "to volatile destination.");<br>
+<br>
+ Value *StorePtr = SI->getPointerOperand();<br>
+ auto *StoreEv = cast<SCEVAddRecExpr>(SE-><wbr>getSCEV(StorePtr));<br>
+ unsigned Stride = getSCEVStride(StoreEv);<br>
+ unsigned StoreSize = getStoreSizeInBytes(SI);<br>
+ if (Stride != StoreSize)<br>
+ return false;<br>
+<br>
+ // See if the pointer expression is an AddRec like {base,+,1} on the current<br>
+ // loop, which indicates a strided load. If we have something else, it's a<br>
+ // random load we can't handle.<br>
+ LoadInst *LI = dyn_cast<LoadInst>(SI-><wbr>getValueOperand());<br>
+ auto *LoadEv = cast<SCEVAddRecExpr>(SE-><wbr>getSCEV(LI->getPointerOperand(<wbr>)));<br>
+<br>
+ // The trip count of the loop and the base pointer of the addrec SCEV is<br>
+ // guaranteed to be loop invariant, which means that it should dominate the<br>
+ // header. This allows us to insert code for it in the preheader.<br>
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();<br>
+ Instruction *ExpPt = Preheader->getTerminator();<br>
+ IRBuilder<> Builder(ExpPt);<br>
+ SCEVExpander Expander(*SE, *DL, "hexagon-loop-idiom");<br>
+<br>
+ Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace());<br>
+<br>
+ // Okay, we have a strided store "p[i]" of a loaded value. We can turn<br>
+ // this into a memcpy/memmove in the loop preheader now if we want. However,<br>
+ // this would be unsafe to do if there is anything else in the loop that may<br>
+ // read or write the memory region we're storing to. For memcpy, this<br>
+ // includes the load that feeds the stores. Check for an alias by generating<br>
+ // the base address and checking everything.<br>
+ Value *StoreBasePtr = Expander.expandCodeFor(<wbr>StoreEv->getStart(),<br>
+ Builder.getInt8PtrTy(SI-><wbr>getPointerAddressSpace()), ExpPt);<br>
+ Value *LoadBasePtr = nullptr;<br>
+<br>
+ bool Overlap = false;<br>
+ bool DestVolatile = SI->isVolatile();<br>
+ Type *BECountTy = BECount->getType();<br>
+<br>
+ if (DestVolatile) {<br>
+ // The trip count must fit in i32, since it is the type of the "num_words"<br>
+ // argument to hexagon_memcpy_forward_<wbr>vp4cp4n2.<br>
+ if (StoreSize != 4 || DL->getTypeSizeInBits(<wbr>BECountTy) > 32) {<br>
+CleanupAndExit:<br>
+ // If we generated new code for the base pointer, clean up.<br>
+ Expander.clear();<br>
+ if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) {<br>
+ RecursivelyDeleteTriviallyDead<wbr>Instructions(StoreBasePtr, TLI);<br>
+ StoreBasePtr = nullptr;<br>
+ }<br>
+ if (LoadBasePtr) {<br>
+ RecursivelyDeleteTriviallyDead<wbr>Instructions(LoadBasePtr, TLI);<br>
+ LoadBasePtr = nullptr;<br>
+ }<br>
+ return false;<br>
+ }<br>
+ }<br>
+<br>
+ SmallPtrSet<Instruction*, 2> Ignore1;<br>
+ Ignore1.insert(SI);<br>
+ if (mayLoopAccessLocation(<wbr>StoreBasePtr, MRI_ModRef, CurLoop, BECount,<br>
+ StoreSize, *AA, Ignore1)) {<br>
+ // Check if the load is the offending instruction.<br>
+ Ignore1.insert(LI);<br>
+ if (mayLoopAccessLocation(<wbr>StoreBasePtr, MRI_ModRef, CurLoop, BECount,<br>
+ StoreSize, *AA, Ignore1)) {<br>
+ // Still bad. Nothing we can do.<br>
+ goto CleanupAndExit;<br>
+ }<br>
+ // It worked with the load ignored.<br>
+ Overlap = true;<br>
+ }<br>
+<br>
+ if (!Overlap) {<br>
+ if (DisableMemcpyIdiom || !HasMemcpy)<br>
+ goto CleanupAndExit;<br>
+ } else {<br>
+ // Don't generate memmove if this function will be inlined. This is<br>
+ // because the caller will undergo this transformation after inlining.<br>
+ Function *Func = CurLoop->getHeader()-><wbr>getParent();<br>
+ if (Func->hasFnAttribute(<wbr>Attribute::AlwaysInline))<br>
+ goto CleanupAndExit;<br>
+<br>
+ // In case of a memmove, the call to memmove will be executed instead<br>
+ // of the loop, so we need to make sure that there is nothing else in<br>
+ // the loop than the load, store and instructions that these two depend<br>
+ // on.<br>
+ SmallVector<Instruction*,2> Insts;<br>
+ Insts.push_back(SI);<br>
+ Insts.push_back(LI);<br>
+ if (!coverLoop(CurLoop, Insts))<br>
+ goto CleanupAndExit;<br>
+<br>
+ if (DisableMemmoveIdiom || !HasMemmove)<br>
+ goto CleanupAndExit;<br>
+ bool IsNested = CurLoop->getParentLoop() != 0;<br>
+ if (IsNested && OnlyNonNestedMemmove)<br>
+ goto CleanupAndExit;<br>
+ }<br>
+<br>
+ // For a memcpy, we have to make sure that the input array is not being<br>
+ // mutated by the loop.<br>
+ LoadBasePtr = Expander.expandCodeFor(LoadEv-<wbr>>getStart(),<br>
+ Builder.getInt8PtrTy(LI-><wbr>getPointerAddressSpace()), ExpPt);<br>
+<br>
+ SmallPtrSet<Instruction*, 2> Ignore2;<br>
+ Ignore2.insert(SI);<br>
+ if (mayLoopAccessLocation(<wbr>LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize,<br>
+ *AA, Ignore2))<br>
+ goto CleanupAndExit;<br>
+<br>
+ // Check the stride.<br>
+ bool StridePos = getSCEVStride(LoadEv) >= 0;<br>
+<br>
+ // Currently, the volatile memcpy only emulates traversing memory forward.<br>
+ if (!StridePos && DestVolatile)<br>
+ goto CleanupAndExit;<br>
+<br>
+ bool RuntimeCheck = (Overlap || DestVolatile);<br>
+<br>
+ BasicBlock *ExitB;<br>
+ if (RuntimeCheck) {<br>
+ // The runtime check needs a single exit block.<br>
+ SmallVector<BasicBlock*, 8> ExitBlocks;<br>
+ CurLoop->getUniqueExitBlocks(<wbr>ExitBlocks);<br>
+ if (ExitBlocks.size() != 1)<br>
+ goto CleanupAndExit;<br>
+ ExitB = ExitBlocks[0];<br>
+ }<br>
+<br>
+ // The # stored bytes is (BECount+1)*Size. Expand the trip count out to<br>
+ // pointer size if it isn't already.<br>
+ LLVMContext &Ctx = SI->getContext();<br>
+ BECount = SE->getTruncateOrZeroExtend(<wbr>BECount, IntPtrTy);<br>
+ unsigned Alignment = std::min(SI->getAlignment(), LI->getAlignment());<br>
+ DebugLoc DLoc = SI->getDebugLoc();<br>
+<br>
+ const SCEV *NumBytesS =<br>
+ SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);<br>
+ if (StoreSize != 1)<br>
+ NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),<br>
+ SCEV::FlagNUW);<br>
+ Value *NumBytes = Expander.expandCodeFor(<wbr>NumBytesS, IntPtrTy, ExpPt);<br>
+ if (Instruction *In = dyn_cast<Instruction>(<wbr>NumBytes))<br>
+ if (Value *Simp = SimplifyInstruction(In, *DL, TLI, DT))<br>
+ NumBytes = Simp;<br>
+<br>
+ CallInst *NewCall;<br>
+<br>
+ if (RuntimeCheck) {<br>
+ unsigned Threshold = RuntimeMemSizeThreshold;<br>
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(<wbr>NumBytes)) {<br>
+ uint64_t C = CI->getZExtValue();<br>
+ if (Threshold != 0 && C < Threshold)<br>
+ goto CleanupAndExit;<br>
+ if (C < CompileTimeMemSizeThreshold)<br>
+ goto CleanupAndExit;<br>
+ }<br>
+<br>
+ BasicBlock *Header = CurLoop->getHeader();<br>
+ Function *Func = Header->getParent();<br>
+ Loop *ParentL = LF->getLoopFor(Preheader);<br>
+ StringRef HeaderName = Header->getName();<br>
+<br>
+ // Create a new (empty) preheader, and update the PHI nodes in the<br>
+ // header to use the new preheader.<br>
+ BasicBlock *NewPreheader = BasicBlock::Create(Ctx, HeaderName+".<a href="http://rtli.ph" rel="noreferrer" target="_blank">rtli.ph</a>",<br>
+ Func, Header);<br>
+ if (ParentL)<br>
+ ParentL->addBasicBlockToLoop(<wbr>NewPreheader, *LF);<br>
+ IRBuilder<>(NewPreheader).<wbr>CreateBr(Header);<br>
+ for (auto &In : *Header) {<br>
+ PHINode *PN = dyn_cast<PHINode>(&In);<br>
+ if (!PN)<br>
+ break;<br>
+ int bx = PN->getBasicBlockIndex(<wbr>Preheader);<br>
+ if (bx >= 0)<br>
+ PN->setIncomingBlock(bx, NewPreheader);<br>
+ }<br>
+ DT->addNewBlock(NewPreheader, Preheader);<br>
+ DT->changeImmediateDominator(<wbr>Header, NewPreheader);<br>
+<br>
+ // Check for safe conditions to execute memmove.<br>
+ // If stride is positive, copying things from higher to lower addresses<br>
+ // is equivalent to memmove. For negative stride, it's the other way<br>
+ // around. Copying forward in memory with positive stride may not be<br>
+ // same as memmove since we may be copying values that we just stored<br>
+ // in some previous iteration.<br>
+ Value *LA = Builder.CreatePtrToInt(<wbr>LoadBasePtr, IntPtrTy);<br>
+ Value *SA = Builder.CreatePtrToInt(<wbr>StoreBasePtr, IntPtrTy);<br>
+ Value *LowA = StridePos ? SA : LA;<br>
+ Value *HighA = StridePos ? LA : SA;<br>
+ Value *CmpA = Builder.CreateICmpULT(LowA, HighA);<br>
+ Value *Cond = CmpA;<br>
+<br>
+ // Check for distance between pointers.<br>
+ Value *Dist = Builder.CreateSub(HighA, LowA);<br>
+ Value *CmpD = Builder.CreateICmpSLT(<wbr>NumBytes, Dist);<br>
+ Value *CmpEither = Builder.CreateOr(Cond, CmpD);<br>
+ Cond = CmpEither;<br>
+<br>
+ if (Threshold != 0) {<br>
+ Type *Ty = NumBytes->getType();<br>
+ Value *Thr = ConstantInt::get(Ty, Threshold);<br>
+ Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes);<br>
+ Value *CmpBoth = Builder.CreateAnd(Cond, CmpB);<br>
+ Cond = CmpBoth;<br>
+ }<br>
+ BasicBlock *MemmoveB = BasicBlock::Create(Ctx, Header->getName()+".rtli",<br>
+ Func, NewPreheader);<br>
+ if (ParentL)<br>
+ ParentL->addBasicBlockToLoop(<wbr>MemmoveB, *LF);<br>
+ Instruction *OldT = Preheader->getTerminator();<br>
+ Builder.CreateCondBr(Cond, MemmoveB, NewPreheader);<br>
+ OldT->eraseFromParent();<br>
+ Preheader->setName(Preheader-><wbr>getName()+".old");<br>
+ DT->addNewBlock(MemmoveB, Preheader);<br>
+ // Find the new immediate dominator of the exit block.<br>
+ BasicBlock *ExitD = Preheader;<br>
+ for (auto PI = pred_begin(ExitB), PE = pred_end(ExitB); PI != PE; ++PI) {<br>
+ BasicBlock *PB = *PI;<br>
+ ExitD = DT-><wbr>findNearestCommonDominator(<wbr>ExitD, PB);<br>
+ if (!ExitD)<br>
+ break;<br>
+ }<br>
+ // If the prior immediate dominator of ExitB was dominated by the<br>
+ // old preheader, then the old preheader becomes the new immediate<br>
+ // dominator. Otherwise don't change anything (because the newly<br>
+ // added blocks are dominated by the old preheader).<br>
+ if (ExitD && DT->dominates(Preheader, ExitD)) {<br>
+ DomTreeNode *BN = DT->getNode(ExitB);<br>
+ DomTreeNode *DN = DT->getNode(ExitD);<br>
+ BN->setIDom(DN);<br>
+ }<br>
+<br>
+ // Add a call to memmove to the conditional block.<br>
+ IRBuilder<> CondBuilder(MemmoveB);<br>
+ CondBuilder.CreateBr(ExitB);<br>
+ CondBuilder.SetInsertPoint(<wbr>MemmoveB->getTerminator());<br>
+<br>
+ if (DestVolatile) {<br>
+ Type *Int32Ty = Type::getInt32Ty(Ctx);<br>
+ Type *Int32PtrTy = Type::getInt32PtrTy(Ctx);<br>
+ Type *VoidTy = Type::getVoidTy(Ctx);<br>
+ Module *M = Func->getParent();<br>
+ Constant *CF = M->getOrInsertFunction(<wbr>HexagonVolatileMemcpyName, VoidTy,<br>
+ Int32PtrTy, Int32PtrTy, Int32Ty,<br>
+ nullptr);<br>
+ Function *Fn = cast<Function>(CF);<br>
+ Fn->setLinkage(Function::<wbr>ExternalLinkage);<br>
+<br>
+ const SCEV *OneS = SE->getConstant(Int32Ty, 1);<br>
+ const SCEV *BECount32 = SE->getTruncateOrZeroExtend(<wbr>BECount, Int32Ty);<br>
+ const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS, SCEV::FlagNUW);<br>
+ Value *NumWords = Expander.expandCodeFor(<wbr>NumWordsS, Int32Ty,<br>
+ MemmoveB->getTerminator());<br>
+ if (Instruction *In = dyn_cast<Instruction>(<wbr>NumWords))<br>
+ if (Value *Simp = SimplifyInstruction(In, *DL, TLI, DT))<br>
+ NumWords = Simp;<br>
+<br>
+ Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy)<br>
+ ? StoreBasePtr<br>
+ : CondBuilder.CreateBitCast(<wbr>StoreBasePtr, Int32PtrTy);<br>
+ Value *Op1 = (LoadBasePtr->getType() == Int32PtrTy)<br>
+ ? LoadBasePtr<br>
+ : CondBuilder.CreateBitCast(<wbr>LoadBasePtr, Int32PtrTy);<br>
+ NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords});<br>
+ } else {<br>
+ NewCall = CondBuilder.CreateMemMove(<wbr>StoreBasePtr, LoadBasePtr,<br>
+ NumBytes, Alignment);<br>
+ }<br>
+ } else {<br>
+ NewCall = Builder.CreateMemCpy(<wbr>StoreBasePtr, LoadBasePtr,<br>
+ NumBytes, Alignment);<br>
+ // Okay, the memcpy has been formed. Zap the original store and<br>
+ // anything that feeds into it.<br>
+ RecursivelyDeleteTriviallyDead<wbr>Instructions(SI, TLI);<br>
+ }<br>
+<br>
+ NewCall->setDebugLoc(DLoc);<br>
+<br>
+ DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ")<br>
+ << *NewCall << "\n"<br>
+ << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"<br>
+ << " from store ptr=" << *StoreEv << " at: " << *SI << "\n");<br>
+<br>
+ return true;<br>
+}<br>
+<br>
+<br>
+// \brief Check if the instructions in Insts, together with their dependencies<br>
+// cover the loop in the sense that the loop could be safely eliminated once<br>
+// the instructions in Insts are removed.<br>
+bool HexagonLoopIdiomRecognize::<wbr>coverLoop(Loop *L,<br>
+ SmallVectorImpl<Instruction*> &Insts) const {<br>
+ SmallSet<BasicBlock*,8> LoopBlocks;<br>
+ for (auto *B : L->blocks())<br>
+ LoopBlocks.insert(B);<br>
+<br>
+ SetVector<Instruction*> Worklist(Insts.begin(), Insts.end());<br>
+<br>
+ // Collect all instructions from the loop that the instructions in Insts<br>
+ // depend on (plus their dependencies, etc.). These instructions will<br>
+ // constitute the expression trees that feed those in Insts, but the trees<br>
+ // will be limited only to instructions contained in the loop.<br>
+ for (unsigned i = 0; i < Worklist.size(); ++i) {<br>
+ Instruction *In = Worklist[i];<br>
+ for (auto I = In->op_begin(), E = In->op_end(); I != E; ++I) {<br>
+ Instruction *OpI = dyn_cast<Instruction>(I);<br>
+ if (!OpI)<br>
+ continue;<br>
+ BasicBlock *PB = OpI->getParent();<br>
+ if (!LoopBlocks.count(PB))<br>
+ continue;<br>
+ Worklist.insert(OpI);<br>
+ }<br>
+ }<br>
+<br>
+ // Scan all instructions in the loop, if any of them have a user outside<br>
+ // of the loop, or outside of the expressions collected above, then either<br>
+ // the loop has a side-effect visible outside of it, or there are<br>
+ // instructions in it that are not involved in the original set Insts.<br>
+ for (auto *B : L->blocks()) {<br>
+ for (auto &In : *B) {<br>
+ if (isa<BranchInst>(In) || isa<DbgInfoIntrinsic>(In))<br>
+ continue;<br>
+ if (!Worklist.count(&In) && In.mayHaveSideEffects())<br>
+ return false;<br>
+ for (const auto &K : In.users()) {<br>
+ Instruction *UseI = dyn_cast<Instruction>(K);<br>
+ if (!UseI)<br>
+ continue;<br>
+ BasicBlock *UseB = UseI->getParent();<br>
+ if (LF->getLoopFor(UseB) != L)<br>
+ return false;<br>
+ }<br>
+ }<br>
+ }<br>
+<br>
+ return true;<br>
+}<br>
+<br>
+/// runOnLoopBlock - Process the specified block, which lives in a counted loop<br>
+/// with the specified backedge count. This block is known to be in the current<br>
+/// loop and not in any subloops.<br>
+bool HexagonLoopIdiomRecognize::<wbr>runOnLoopBlock(Loop *CurLoop, BasicBlock *BB,<br>
+ const SCEV *BECount, SmallVectorImpl<BasicBlock*> &ExitBlocks) {<br>
+ // We can only promote stores in this block if they are unconditionally<br>
+ // executed in the loop. For a block to be unconditionally executed, it has<br>
+ // to dominate all the exit blocks of the loop. Verify this now.<br>
+ auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool {<br>
+ return DT->dominates(BB, EB);<br>
+ };<br>
+ if (!std::all_of(ExitBlocks.<wbr>begin(), ExitBlocks.end(), DominatedByBB))<br>
+ return false;<br>
+<br>
+ bool MadeChange = false;<br>
+ // Look for store instructions, which may be optimized to memset/memcpy.<br>
+ SmallVector<StoreInst*,8> Stores;<br>
+ collectStores(CurLoop, BB, Stores);<br>
+<br>
+ // Optimize the store into a memcpy, if it feeds an similarly strided load.<br>
+ for (auto &SI : Stores)<br>
+ MadeChange |= processCopyingStore(CurLoop, SI, BECount);<br>
+<br>
+ return MadeChange;<br>
+}<br>
+<br>
+<br>
+bool HexagonLoopIdiomRecognize::<wbr>runOnCountableLoop(Loop *L) {<br>
+ PolynomialMultiplyRecognize PMR(L, *DL, *DT, *TLI, *SE);<br>
+ if (PMR.recognize())<br>
+ return true;<br>
+<br>
+ if (!HasMemcpy && !HasMemmove)<br>
+ return false;<br>
+<br>
+ const SCEV *BECount = SE->getBackedgeTakenCount(L);<br>
+ assert(!isa<<wbr>SCEVCouldNotCompute>(BECount) &&<br>
+ "runOnCountableLoop() called on a loop without a predictable"<br>
+ "backedge-taken count");<br>
+<br>
+ SmallVector<BasicBlock *, 8> ExitBlocks;<br>
+ L->getUniqueExitBlocks(<wbr>ExitBlocks);<br>
+<br>
+ bool Changed = false;<br>
+<br>
+ // Scan all the blocks in the loop that are not in subloops.<br>
+ for (auto *BB : L->getBlocks()) {<br>
+ // Ignore blocks in subloops.<br>
+ if (LF->getLoopFor(BB) != L)<br>
+ continue;<br>
+ Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks);<br>
+ }<br>
+<br>
+ return Changed;<br>
+}<br>
+<br>
+<br>
+bool HexagonLoopIdiomRecognize::<wbr>runOnLoop(Loop *L, LPPassManager &LPM) {<br>
+ const Module &M = *L->getHeader()->getParent()-><wbr>getParent();<br>
+ if (Triple(M.getTargetTriple()).<wbr>getArch() != Triple::hexagon)<br>
+ return false;<br>
+<br>
+ if (skipLoop(L))<br>
+ return false;<br>
+<br>
+ // If the loop could not be converted to canonical form, it must have an<br>
+ // indirectbr in it, just give up.<br>
+ if (!L->getLoopPreheader())<br>
+ return false;<br>
+<br>
+ // Disable loop idiom recognition if the function's name is a common idiom.<br>
+ StringRef Name = L->getHeader()->getParent()-><wbr>getName();<br>
+ if (Name == "memset" || Name == "memcpy" || Name == "memmove")<br>
+ return false;<br>
+<br>
+ AA = &getAnalysis<<wbr>AAResultsWrapperPass>().<wbr>getAAResults();<br>
+ DL = &L->getHeader()->getModule()-><wbr>getDataLayout();<br>
+ DT = &getAnalysis<<wbr>DominatorTreeWrapperPass>().<wbr>getDomTree();<br>
+ LF = &getAnalysis<<wbr>LoopInfoWrapperPass>().<wbr>getLoopInfo();<br>
+ TLI = &getAnalysis<<wbr>TargetLibraryInfoWrapperPass>(<wbr>).getTLI();<br>
+ SE = &getAnalysis<<wbr>ScalarEvolutionWrapperPass>().<wbr>getSE();<br>
+<br>
+ HasMemcpy = TLI->has(LibFunc_memcpy);<br>
+ HasMemmove = TLI->has(LibFunc_memmove);<br>
+<br>
+ if (SE-><wbr>hasLoopInvariantBackedgeTakenC<wbr>ount(L))<br>
+ return runOnCountableLoop(L);<br>
+ return false;<br>
+}<br>
+<br>
+<br>
+Pass *llvm::<wbr>createHexagonLoopIdiomPass() {<br>
+ return new HexagonLoopIdiomRecognize();<br>
+}<br>
+<br>
<br>
Modified: llvm/trunk/lib/Target/Hexagon/<wbr>HexagonTargetMachine.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonTargetMachine.cpp?rev=293213&r1=293212&r2=293213&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/lib/Target/<wbr>Hexagon/HexagonTargetMachine.<wbr>cpp?rev=293213&r1=293212&r2=<wbr>293213&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/lib/Target/Hexagon/<wbr>HexagonTargetMachine.cpp (original)<br>
+++ llvm/trunk/lib/Target/Hexagon/<wbr>HexagonTargetMachine.cpp Thu Jan 26 15:41:10 2017<br>
@@ -24,6 +24,7 @@<br>
#include "llvm/Support/CommandLine.h"<br>
#include "llvm/Support/TargetRegistry.<wbr>h"<br>
#include "llvm/Transforms/Scalar.h"<br>
+#include "llvm/Transforms/IPO/<wbr>PassManagerBuilder.h"<br>
<br>
using namespace llvm;<br>
<br>
@@ -98,11 +99,6 @@ static cl::opt<bool> EnableVectorPrint("<br>
extern "C" int HexagonTargetMachineModule;<br>
int HexagonTargetMachineModule = 0;<br>
<br>
-extern "C" void LLVMInitializeHexagonTarget() {<br>
- // Register the target.<br>
- RegisterTargetMachine<<wbr>HexagonTargetMachine> X(getTheHexagonTarget());<br>
-}<br>
-<br>
static ScheduleDAGInstrs *createVLIWMachineSched(<wbr>MachineSchedContext *C) {<br>
return new VLIWMachineScheduler(C, make_unique<<wbr>ConvergingVLIWScheduler>());<br>
}<br>
@@ -114,6 +110,8 @@ SchedCustomRegistry("hexagon", "Run Hexa<br>
namespace llvm {<br>
extern char &HexagonExpandCondsetsID;<br>
void initializeHexagonExpandCondset<wbr>sPass(PassRegistry&);<br>
+ void initializeHexagonLoopIdiomReco<wbr>gnizePass(PassRegistry&);<br>
+ Pass *createHexagonLoopIdiomPass();<br>
<br>
FunctionPass *createHexagonBitSimplify();<br>
FunctionPass *<wbr>createHexagonBranchRelaxation(<wbr>);<br>
@@ -150,6 +148,12 @@ static Reloc::Model getEffectiveRelocMod<br>
return *RM;<br>
}<br>
<br>
+extern "C" void LLVMInitializeHexagonTarget() {<br>
+ // Register the target.<br>
+ RegisterTargetMachine<<wbr>HexagonTargetMachine> X(getTheHexagonTarget());<br>
+ initializeHexagonLoopIdiomReco<wbr>gnizePass(*PassRegistry::<wbr>getPassRegistry());<br>
+}<br>
+<br>
HexagonTargetMachine::<wbr>HexagonTargetMachine(const Target &T, const Triple &TT,<br>
StringRef CPU, StringRef FS,<br>
const TargetOptions &Options,<br>
@@ -196,6 +200,14 @@ HexagonTargetMachine::<wbr>getSubtargetImpl(c<br>
return I.get();<br>
}<br>
<br>
+void HexagonTargetMachine::<wbr>adjustPassManager(<wbr>PassManagerBuilder &PMB) {<br>
+ PMB.addExtension(<br>
+ PassManagerBuilder::EP_<wbr>LateLoopOptimizations,<br>
+ [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {<br>
+ PM.add(<wbr>createHexagonLoopIdiomPass());<br>
+ });<br>
+}<br>
+<br>
TargetIRAnalysis HexagonTargetMachine::<wbr>getTargetIRAnalysis() {<br>
return TargetIRAnalysis([this](const Function &F) {<br>
return TargetTransformInfo(<wbr>HexagonTTIImpl(this, F));<br>
<br>
Modified: llvm/trunk/lib/Target/Hexagon/<wbr>HexagonTargetMachine.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonTargetMachine.h?rev=293213&r1=293212&r2=293213&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/lib/Target/<wbr>Hexagon/HexagonTargetMachine.<wbr>h?rev=293213&r1=293212&r2=<wbr>293213&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/lib/Target/Hexagon/<wbr>HexagonTargetMachine.h (original)<br>
+++ llvm/trunk/lib/Target/Hexagon/<wbr>HexagonTargetMachine.h Thu Jan 26 15:41:10 2017<br>
@@ -37,6 +37,7 @@ public:<br>
<br>
static unsigned getModuleMatchQuality(const Module &M);<br>
<br>
+ void adjustPassManager(<wbr>PassManagerBuilder &PMB) override;<br>
TargetPassConfig *createPassConfig(<wbr>PassManagerBase &PM) override;<br>
TargetIRAnalysis getTargetIRAnalysis() override;<br>
<br>
<br>
Added: llvm/trunk/test/CodeGen/<wbr>Hexagon/loop-idiom/hexagon-<wbr>memmove1.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/loop-idiom/hexagon-memmove1.ll?rev=293213&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>CodeGen/Hexagon/loop-idiom/<wbr>hexagon-memmove1.ll?rev=<wbr>293213&view=auto</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/test/CodeGen/<wbr>Hexagon/loop-idiom/hexagon-<wbr>memmove1.ll (added)<br>
+++ llvm/trunk/test/CodeGen/<wbr>Hexagon/loop-idiom/hexagon-<wbr>memmove1.ll Thu Jan 26 15:41:10 2017<br>
@@ -0,0 +1,36 @@<br>
+; Check for recognizing the "memmove" idiom.<br>
+; RUN: opt -basicaa -hexagon-loop-idiom -S -mtriple hexagon-unknown-elf < %s \<br>
+; RUN: | FileCheck %s<br>
+; CHECK: call void @llvm.memmove<br>
+<br>
+; Function Attrs: norecurse nounwind<br>
+define void @foo(i32* nocapture %A, i32* nocapture readonly %B, i32 %n) #0 {<br>
+entry:<br>
+ %cmp1 = icmp sgt i32 %n, 0<br>
+ br i1 %cmp1, label %for.body.preheader, label %for.end<br>
+<br>
+for.body.preheader: ; preds = %entry<br>
+ %arrayidx.gep = getelementptr i32, i32* %B, i32 0<br>
+ %arrayidx1.gep = getelementptr i32, i32* %A, i32 0<br>
+ br label %for.body<br>
+<br>
+for.body: ; preds = %for.body.preheader, %for.body<br>
+ %arrayidx.phi = phi i32* [ %arrayidx.gep, %for.body.preheader ], [ %arrayidx.inc, %for.body ]<br>
+ %arrayidx1.phi = phi i32* [ %arrayidx1.gep, %for.body.preheader ], [ %arrayidx1.inc, %for.body ]<br>
+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]<br>
+ %0 = load i32, i32* %arrayidx.phi, align 4<br>
+ store i32 %0, i32* %arrayidx1.phi, align 4<br>
+ %inc = add nuw nsw i32 %i.02, 1<br>
+ %exitcond = icmp ne i32 %inc, %n<br>
+ %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1<br>
+ %arrayidx1.inc = getelementptr i32, i32* %arrayidx1.phi, i32 1<br>
+ br i1 %exitcond, label %for.body, label %for.end.loopexit<br>
+<br>
+for.end.loopexit: ; preds = %for.body<br>
+ br label %for.end<br>
+<br>
+for.end: ; preds = %for.end.loopexit, %entry<br>
+ ret void<br>
+}<br>
+<br>
+attributes #0 = { nounwind }<br>
<br>
Added: llvm/trunk/test/CodeGen/<wbr>Hexagon/loop-idiom/hexagon-<wbr>memmove2.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/loop-idiom/hexagon-memmove2.ll?rev=293213&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>CodeGen/Hexagon/loop-idiom/<wbr>hexagon-memmove2.ll?rev=<wbr>293213&view=auto</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/test/CodeGen/<wbr>Hexagon/loop-idiom/hexagon-<wbr>memmove2.ll (added)<br>
+++ llvm/trunk/test/CodeGen/<wbr>Hexagon/loop-idiom/hexagon-<wbr>memmove2.ll Thu Jan 26 15:41:10 2017<br>
@@ -0,0 +1,36 @@<br>
+; RUN: opt -basicaa -hexagon-loop-idiom -S -mtriple hexagon-unknown-elf < %s \<br>
+; RUN: | FileCheck %s<br>
+<br>
+define void @PR14241(i32* %s, i64 %size) #0 {<br>
+; Ensure that we don't form a memcpy for strided loops. Briefly, when we taught<br>
+; LoopIdiom about memmove and strided loops, this got miscompiled into a memcpy<br>
+; instead of a memmove. If we get the memmove transform back, this will catch<br>
+; regressions.<br>
+;<br>
+; CHECK-LABEL: @PR14241(<br>
+<br>
+entry:<br>
+ %end.idx = add i64 %size, -1<br>
+ %end.ptr = getelementptr inbounds i32, i32* %s, i64 %end.idx<br>
+ br label %while.body<br>
+; CHECK-NOT: memcpy<br>
+; CHECK: memmove<br>
+<br>
+while.body:<br>
+ %phi.ptr = phi i32* [ %s, %entry ], [ %next.ptr, %while.body ]<br>
+ %src.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1<br>
+ %val = load i32, i32* %src.ptr, align 4<br>
+; CHECK: load<br>
+ %dst.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 0<br>
+ store i32 %val, i32* %dst.ptr, align 4<br>
+; CHECK: store<br>
+ %next.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1<br>
+ %cmp = icmp eq i32* %next.ptr, %end.ptr<br>
+ br i1 %cmp, label %exit, label %while.body<br>
+<br>
+exit:<br>
+ ret void<br>
+; CHECK: ret void<br>
+}<br>
+<br>
+attributes #0 = { nounwind }<br>
<br>
Added: llvm/trunk/test/CodeGen/<wbr>Hexagon/loop-idiom/lcssa.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/loop-idiom/lcssa.ll?rev=293213&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>CodeGen/Hexagon/loop-idiom/<wbr>lcssa.ll?rev=293213&view=auto</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/test/CodeGen/<wbr>Hexagon/loop-idiom/lcssa.ll (added)<br>
+++ llvm/trunk/test/CodeGen/<wbr>Hexagon/loop-idiom/lcssa.ll Thu Jan 26 15:41:10 2017<br>
@@ -0,0 +1,46 @@<br>
+; RUN: opt -hexagon-loop-idiom -loop-deletion -gvn -S < %s<br>
+; REQUIRES: asserts<br>
+<br>
+; This tests that the HexagonLoopIdiom pass does not mark LCSSA information<br>
+; as preserved. The pass calls SimplifyInstruction is a couple of places,<br>
+; which can invalidate LCSSA. Specifically, the uses of a LCSSA phi variable<br>
+; are replaced by the incoming value.<br>
+<br>
+define hidden void @test() local_unnamed_addr #0 {<br>
+entry:<br>
+ br label %if.then63<br>
+<br>
+if.then63:<br>
+ br i1 undef, label %do.body311, label %if.end375<br>
+<br>
+do.body311:<br>
+ br i1 undef, label %do.end318, label %do.body311<br>
+<br>
+do.end318:<br>
+ br i1 undef, label %if.end322, label %if.end375<br>
+<br>
+if.end322:<br>
+ %sub325 = sub i32 undef, undef<br>
+ br i1 undef, label %do.end329, label %do.body311<br>
+<br>
+do.end329:<br>
+ %sub325.lcssa = phi i32 [ %sub325, %if.end322 ]<br>
+ br label %do.body330<br>
+<br>
+do.body330:<br>
+ %row_width.7 = phi i32 [ %sub325.lcssa, %do.end329 ], [ %dec334, %do.body330 ]<br>
+ %sp.5 = phi i8* [ undef, %do.end329 ], [ %incdec.ptr331, %do.body330 ]<br>
+ %dp.addr.5 = phi i8* [ undef, %do.end329 ], [ %incdec.ptr332, %do.body330 ]<br>
+ %0 = load i8, i8* %sp.5, align 1<br>
+ store i8 %0, i8* %dp.addr.5, align 1<br>
+ %incdec.ptr332 = getelementptr inbounds i8, i8* %dp.addr.5, i32 1<br>
+ %incdec.ptr331 = getelementptr inbounds i8, i8* %sp.5, i32 1<br>
+ %dec334 = add i32 %row_width.7, -1<br>
+ %cmp335 = icmp eq i32 %dec334, 0<br>
+ br i1 %cmp335, label %if.end375, label %do.body330<br>
+<br>
+if.end375:<br>
+ ret void<br>
+}<br>
+<br>
+attributes #0 = { nounwind }<br>
<br>
Added: llvm/trunk/test/CodeGen/<wbr>Hexagon/loop-idiom/nullptr-<wbr>crash.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/loop-idiom/nullptr-crash.ll?rev=293213&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>CodeGen/Hexagon/loop-idiom/<wbr>nullptr-crash.ll?rev=293213&<wbr>view=auto</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/test/CodeGen/<wbr>Hexagon/loop-idiom/nullptr-<wbr>crash.ll (added)<br>
+++ llvm/trunk/test/CodeGen/<wbr>Hexagon/loop-idiom/nullptr-<wbr>crash.ll Thu Jan 26 15:41:10 2017<br>
@@ -0,0 +1,24 @@<br>
+; RUN: opt -basicaa -hexagon-loop-idiom -mtriple hexagon-unknown-elf < %s<br>
+; REQUIRES: asserts<br>
+<br>
+target triple = "hexagon"<br>
+<br>
+; Function Attrs: nounwind<br>
+define void @fred(i8 zeroext %L) #0 {<br>
+entry:<br>
+ br i1 undef, label %if.end53, label %while.body37<br>
+<br>
+while.body37: ; preds = %while.body37, %entry<br>
+ %i.121 = phi i32 [ %inc46, %while.body37 ], [ 0, %entry ]<br>
+ %shl = shl i32 1, %i.121<br>
+ %and39 = and i32 %shl, undef<br>
+ %tobool40 = icmp eq i32 %and39, 0<br>
+ %inc46 = add nuw nsw i32 %i.121, 1<br>
+ %storemerge = select i1 %tobool40, i8 %L, i8 0<br>
+ br i1 undef, label %while.body37, label %if.end53<br>
+<br>
+if.end53: ; preds = %while.body37, %entry<br>
+ ret void<br>
+}<br>
+<br>
+attributes #0 = { nounwind }<br>
<br>
Added: llvm/trunk/test/CodeGen/<wbr>Hexagon/loop-idiom/pmpy.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/loop-idiom/pmpy.ll?rev=293213&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>CodeGen/Hexagon/loop-idiom/<wbr>pmpy.ll?rev=293213&view=auto</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/test/CodeGen/<wbr>Hexagon/loop-idiom/pmpy.ll (added)<br>
+++ llvm/trunk/test/CodeGen/<wbr>Hexagon/loop-idiom/pmpy.ll Thu Jan 26 15:41:10 2017<br>
@@ -0,0 +1,33 @@<br>
+; RUN: opt -hexagon-loop-idiom < %s -mtriple=hexagon-unknown-<wbr>unknown -S \<br>
+; RUN: | FileCheck %s<br>
+<br>
+target triple = "hexagon"<br>
+<br>
+; CHECK: define i64 @basic_pmpy<br>
+; CHECK: llvm.hexagon.M4.pmpyw<br>
+define i64 @basic_pmpy(i32 %P, i32 %Q) #0 {<br>
+entry:<br>
+ %conv = zext i32 %Q to i64<br>
+ br label %for.body<br>
+<br>
+for.body: ; preds = %entry, %for.body<br>
+ %i.07 = phi i32 [ 0, %entry ], [ %inc, %for.body ]<br>
+ %R.06 = phi i64 [ 0, %entry ], [ %xor.R.06, %for.body ]<br>
+ %shl = shl i32 1, %i.07<br>
+ %and = and i32 %shl, %P<br>
+ %tobool = icmp eq i32 %and, 0<br>
+ %sh_prom = zext i32 %i.07 to i64<br>
+ %shl1 = shl i64 %conv, %sh_prom<br>
+ %xor = xor i64 %shl1, %R.06<br>
+ %xor.R.06 = select i1 %tobool, i64 %R.06, i64 %xor<br>
+ %inc = add nuw nsw i32 %i.07, 1<br>
+ %exitcond = icmp ne i32 %inc, 32<br>
+ br i1 %exitcond, label %for.body, label %for.end<br>
+<br>
+for.end: ; preds = %for.body<br>
+ %R.1.lcssa = phi i64 [ %xor.R.06, %for.body ]<br>
+ ret i64 %R.1.lcssa<br>
+}<br>
+<br>
+attributes #0 = { nounwind }<br>
+<br>
<br>
<br>
______________________________<wbr>_________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br></div>