[llvm] r268583 - clang-format some files in preparation of coming patch reviews.
Dehao Chen via llvm-commits
llvm-commits at lists.llvm.org
Wed May 4 17:54:54 PDT 2016
Author: dehao
Date: Wed May 4 19:54:54 2016
New Revision: 268583
URL: http://llvm.org/viewvc/llvm-project?rev=268583&view=rev
Log:
clang-format some files in preparation of coming patch reviews.
Modified:
llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h
llvm/trunk/lib/Transforms/Scalar/LICM.cpp
llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
Modified: llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h?rev=268583&r1=268582&r2=268583&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h (original)
+++ llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h Wed May 4 19:54:54 2016
@@ -37,13 +37,12 @@ class TargetLibraryInfo;
/// \brief Captures loop safety information.
/// It keep information for loop & its header may throw exception.
struct LICMSafetyInfo {
- bool MayThrow; // The current loop contains an instruction which
- // may throw.
- bool HeaderMayThrow; // Same as previous, but specific to loop header
+ bool MayThrow; // The current loop contains an instruction which
+ // may throw.
+ bool HeaderMayThrow; // Same as previous, but specific to loop header
// Used to update funclet bundle operands.
DenseMap<BasicBlock *, ColorVector> BlockColors;
- LICMSafetyInfo() : MayThrow(false), HeaderMayThrow(false)
- {}
+ LICMSafetyInfo() : MayThrow(false), HeaderMayThrow(false) {}
};
/// The RecurrenceDescriptor is used to identify recurrences variables in a
@@ -268,7 +267,7 @@ public:
public:
/// Default constructor - creates an invalid induction.
InductionDescriptor()
- : StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {}
+ : StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {}
/// Get the consecutive direction. Returns:
/// 0 - unknown or non-consecutive.
@@ -325,8 +324,7 @@ bool simplifyLoop(Loop *L, DominatorTree
/// If ScalarEvolution is passed in, it will be preserved.
///
/// Returns true if any modifications are made to the loop.
-bool formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
- ScalarEvolution *SE);
+bool formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE);
/// \brief Put a loop nest into LCSSA form.
///
@@ -370,8 +368,8 @@ bool hoistRegion(DomTreeNode *, AliasAna
/// insertion point vector, PredIteratorCache, LoopInfo, DominatorTree, Loop,
/// AliasSet information for all instructions of the loop and loop safety
/// information as arguments. It returns changed status.
-bool promoteLoopAccessesToScalars(AliasSet &, SmallVectorImpl<BasicBlock*> &,
- SmallVectorImpl<Instruction*> &,
+bool promoteLoopAccessesToScalars(AliasSet &, SmallVectorImpl<BasicBlock *> &,
+ SmallVectorImpl<Instruction *> &,
PredIteratorCache &, LoopInfo *,
DominatorTree *, const TargetLibraryInfo *,
Loop *, AliasSetTracker *, LICMSafetyInfo *);
@@ -394,7 +392,7 @@ Optional<const MDOperand *> findStringMe
StringRef Name);
/// \brief Set input string into loop metadata by keeping other values intact.
-void addStringMetadataToLoop(Loop *TheLoop, const char *MDString,
+void addStringMetadataToLoop(Loop *TheLoop, const char *MDString,
unsigned V = 0);
/// Helper to consistently add the set of standard passes to a loop pass's \c
@@ -403,7 +401,6 @@ void addStringMetadataToLoop(Loop *TheLo
/// All loop passes should call this as part of implementing their \c
/// getAnalysisUsage.
void getLoopAnalysisUsage(AnalysisUsage &AU);
-
}
#endif
Modified: llvm/trunk/lib/Transforms/Scalar/LICM.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LICM.cpp?rev=268583&r1=268582&r2=268583&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LICM.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LICM.cpp Wed May 4 19:54:54 2016
@@ -30,7 +30,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
@@ -59,6 +58,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -67,15 +67,15 @@ using namespace llvm;
#define DEBUG_TYPE "licm"
-STATISTIC(NumSunk , "Number of instructions sunk out of loop");
-STATISTIC(NumHoisted , "Number of instructions hoisted out of loop");
+STATISTIC(NumSunk, "Number of instructions sunk out of loop");
+STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
-STATISTIC(NumPromoted , "Number of memory locations promoted to registers");
+STATISTIC(NumPromoted, "Number of memory locations promoted to registers");
static cl::opt<bool>
-DisablePromotion("disable-licm-promotion", cl::Hidden,
- cl::desc("Disable memory promotion in LICM pass"));
+ DisablePromotion("disable-licm-promotion", cl::Hidden,
+ cl::desc("Disable memory promotion in LICM pass"));
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
@@ -86,8 +86,7 @@ static bool sink(Instruction &I, const L
const Loop *CurLoop, AliasSetTracker *CurAST,
const LICMSafetyInfo *SafetyInfo);
static bool isGuaranteedToExecute(const Instruction &Inst,
- const DominatorTree *DT,
- const Loop *CurLoop,
+ const DominatorTree *DT, const Loop *CurLoop,
const LICMSafetyInfo *SafetyInfo);
static bool isSafeToExecuteUnconditionally(const Instruction &Inst,
const DominatorTree *DT,
@@ -96,7 +95,7 @@ static bool isSafeToExecuteUnconditional
const LICMSafetyInfo *SafetyInfo,
const Instruction *CtxI = nullptr);
static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
- const AAMDNodes &AAInfo,
+ const AAMDNodes &AAInfo,
AliasSetTracker *CurAST);
static Instruction *
CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
@@ -108,57 +107,57 @@ static bool canSinkOrHoistInst(Instructi
LICMSafetyInfo *SafetyInfo);
namespace {
- struct LICM : public LoopPass {
- static char ID; // Pass identification, replacement for typeid
- LICM() : LoopPass(ID) {
- initializeLICMPass(*PassRegistry::getPassRegistry());
- }
+struct LICM : public LoopPass {
+ static char ID; // Pass identification, replacement for typeid
+ LICM() : LoopPass(ID) {
+ initializeLICMPass(*PassRegistry::getPassRegistry());
+ }
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
- /// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG...
- ///
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- getLoopAnalysisUsage(AU);
- }
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG...
+ ///
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ getLoopAnalysisUsage(AU);
+ }
- using llvm::Pass::doFinalization;
+ using llvm::Pass::doFinalization;
- bool doFinalization() override {
- assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets");
- return false;
- }
+ bool doFinalization() override {
+ assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets");
+ return false;
+ }
- private:
- AliasAnalysis *AA; // Current AliasAnalysis information
- LoopInfo *LI; // Current LoopInfo
- DominatorTree *DT; // Dominator Tree for the current Loop.
-
- TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding.
-
- // State that is updated as we process loops.
- bool Changed; // Set to true when we change anything.
- BasicBlock *Preheader; // The preheader block of the current loop...
- Loop *CurLoop; // The current loop we are working on...
- AliasSetTracker *CurAST; // AliasSet information for the current loop...
- DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap;
-
- /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
- void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
- Loop *L) override;
-
- /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
- /// set.
- void deleteAnalysisValue(Value *V, Loop *L) override;
+private:
+ AliasAnalysis *AA; // Current AliasAnalysis information
+ LoopInfo *LI; // Current LoopInfo
+ DominatorTree *DT; // Dominator Tree for the current Loop.
+
+ TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding.
+
+ // State that is updated as we process loops.
+ bool Changed; // Set to true when we change anything.
+ BasicBlock *Preheader; // The preheader block of the current loop...
+ Loop *CurLoop; // The current loop we are working on...
+ AliasSetTracker *CurAST; // AliasSet information for the current loop...
+ DenseMap<Loop *, AliasSetTracker *> LoopToAliasSetMap;
+
+ /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
+ void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
+ Loop *L) override;
+
+ /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
+ /// set.
+ void deleteAnalysisValue(Value *V, Loop *L) override;
- /// Simple Analysis hook. Delete loop L from alias set map.
- void deleteAnalysisLoop(Loop *L) override;
+ /// Simple Analysis hook. Delete loop L from alias set map.
+ void deleteAnalysisLoop(Loop *L) override;
- AliasSetTracker *collectAliasInfoForLoop(Loop *L);
- };
+ AliasSetTracker *collectAliasInfoForLoop(Loop *L);
+};
}
char LICM::ID = 0;
@@ -225,9 +224,9 @@ bool LICM::runOnLoop(Loop *L, LPPassMana
// Loop over all of the alias sets in the tracker object.
for (AliasSet &AS : *CurAST)
- Changed |= promoteLoopAccessesToScalars(AS, ExitBlocks, InsertPts,
- PIC, LI, DT, TLI, CurLoop,
- CurAST, &SafetyInfo);
+ Changed |=
+ promoteLoopAccessesToScalars(AS, ExitBlocks, InsertPts, PIC, LI, DT,
+ TLI, CurLoop, CurAST, &SafetyInfo);
// Once we have promoted values across the loop body we have to recursively
// reform LCSSA as any nested loop may now have values defined within the
@@ -266,7 +265,7 @@ bool LICM::runOnLoop(Loop *L, LPPassMana
}
/// Walk the specified region of the CFG (defined by all blocks dominated by
-/// the specified block, and that are in the current loop) in reverse depth
+/// the specified block, and that are in the current loop) in reverse depth
/// first order w.r.t the DominatorTree. This allows us to visit uses before
/// definitions, allowing us to sink a loop body in one pass without iteration.
///
@@ -275,25 +274,27 @@ bool llvm::sinkRegion(DomTreeNode *N, Al
AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) {
// Verify inputs.
- assert(N != nullptr && AA != nullptr && LI != nullptr &&
- DT != nullptr && CurLoop != nullptr && CurAST != nullptr &&
- SafetyInfo != nullptr && "Unexpected input to sinkRegion");
+ assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
+ CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&
+ "Unexpected input to sinkRegion");
BasicBlock *BB = N->getBlock();
// If this subregion is not in the top level loop at all, exit.
- if (!CurLoop->contains(BB)) return false;
+ if (!CurLoop->contains(BB))
+ return false;
// We are processing blocks in reverse dfo, so process children first.
bool Changed = false;
- const std::vector<DomTreeNode*> &Children = N->getChildren();
+ const std::vector<DomTreeNode *> &Children = N->getChildren();
for (DomTreeNode *Child : Children)
Changed |= sinkRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
// Only need to process the contents of this block if it is not part of a
// subloop (which would already have been processed).
- if (inSubLoop(BB,CurLoop,LI)) return Changed;
+ if (inSubLoop(BB, CurLoop, LI))
+ return Changed;
- for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) {
+ for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
Instruction &I = *--II;
// If the instruction is dead, we would try to sink it because it isn't used
@@ -330,20 +331,21 @@ bool llvm::hoistRegion(DomTreeNode *N, A
DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) {
// Verify inputs.
- assert(N != nullptr && AA != nullptr && LI != nullptr &&
- DT != nullptr && CurLoop != nullptr && CurAST != nullptr &&
- SafetyInfo != nullptr && "Unexpected input to hoistRegion");
+ assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
+ CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&
+ "Unexpected input to hoistRegion");
BasicBlock *BB = N->getBlock();
// If this subregion is not in the top level loop at all, exit.
- if (!CurLoop->contains(BB)) return false;
+ if (!CurLoop->contains(BB))
+ return false;
// Only need to process the contents of this block if it is not part of a
// subloop (which would already have been processed).
bool Changed = false;
if (!inSubLoop(BB, CurLoop, LI))
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) {
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
Instruction &I = *II++;
// Try constant folding this instruction. If all the operands are
// constants, it is technically hoistable, but it would be better to just
@@ -364,12 +366,13 @@ bool llvm::hoistRegion(DomTreeNode *N, A
//
if (CurLoop->hasLoopInvariantOperands(&I) &&
canSinkOrHoistInst(I, AA, DT, TLI, CurLoop, CurAST, SafetyInfo) &&
- isSafeToExecuteUnconditionally(I, DT, TLI, CurLoop, SafetyInfo,
- CurLoop->getLoopPreheader()->getTerminator()))
+ isSafeToExecuteUnconditionally(
+ I, DT, TLI, CurLoop, SafetyInfo,
+ CurLoop->getLoopPreheader()->getTerminator()))
Changed |= hoist(I, DT, CurLoop, SafetyInfo);
}
- const std::vector<DomTreeNode*> &Children = N->getChildren();
+ const std::vector<DomTreeNode *> &Children = N->getChildren();
for (DomTreeNode *Child : Children)
Changed |= hoistRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
return Changed;
@@ -378,7 +381,7 @@ bool llvm::hoistRegion(DomTreeNode *N, A
/// Computes loop safety information, checks loop body & header
/// for the possibility of may throw exception.
///
-void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) {
+void llvm::computeLICMSafetyInfo(LICMSafetyInfo *SafetyInfo, Loop *CurLoop) {
assert(CurLoop != nullptr && "CurLoop cant be null");
BasicBlock *Header = CurLoop->getHeader();
// Setting default safety values.
@@ -388,11 +391,12 @@ void llvm::computeLICMSafetyInfo(LICMSaf
for (BasicBlock::iterator I = Header->begin(), E = Header->end();
(I != E) && !SafetyInfo->HeaderMayThrow; ++I)
SafetyInfo->HeaderMayThrow |= I->mayThrow();
-
+
SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow;
- // Iterate over loop instructions and compute safety info.
- for (Loop::block_iterator BB = CurLoop->block_begin(),
- BBE = CurLoop->block_end(); (BB != BBE) && !SafetyInfo->MayThrow ; ++BB)
+ // Iterate over loop instructions and compute safety info.
+ for (Loop::block_iterator BB = CurLoop->block_begin(),
+ BBE = CurLoop->block_end();
+ (BB != BBE) && !SafetyInfo->MayThrow; ++BB)
for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end();
(I != E) && !SafetyInfo->MayThrow; ++I)
SafetyInfo->MayThrow |= I->mayThrow();
@@ -415,7 +419,7 @@ bool canSinkOrHoistInst(Instruction &I,
// Loads have extra constraints we have to verify before we can hoist them.
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
if (!LI->isUnordered())
- return false; // Don't hoist volatile/atomic loads!
+ return false; // Don't hoist volatile/atomic loads!
// Loads from constant memory are always safe to move, even if they end up
// in the same alias set as something that ends up being modified.
@@ -467,7 +471,8 @@ bool canSinkOrHoistInst(Instruction &I,
break;
}
}
- if (!FoundMod) return true;
+ if (!FoundMod)
+ return true;
}
// FIXME: This should use mod/ref information to see if we can hoist or
@@ -486,7 +491,7 @@ bool canSinkOrHoistInst(Instruction &I,
// TODO: Plumb the context instruction through to make hoisting and sinking
// more powerful. Hoisting of loads already works due to the special casing
- // above.
+ // above.
return isSafeToExecuteUnconditionally(I, DT, TLI, CurLoop, SafetyInfo,
nullptr);
}
@@ -589,7 +594,8 @@ CloneInstructionInExitBlock(Instruction
}
ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
- if (!I.getName().empty()) New->setName(I.getName() + ".le");
+ if (!I.getName().empty())
+ New->setName(I.getName() + ".le");
// Build LCSSA PHI nodes for any in-loop operands. Note that this is
// particularly cheap because we can rip off the PHI node that we're
@@ -623,15 +629,17 @@ static bool sink(Instruction &I, const L
const LICMSafetyInfo *SafetyInfo) {
DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
bool Changed = false;
- if (isa<LoadInst>(I)) ++NumMovedLoads;
- else if (isa<CallInst>(I)) ++NumMovedCalls;
+ if (isa<LoadInst>(I))
+ ++NumMovedLoads;
+ else if (isa<CallInst>(I))
+ ++NumMovedCalls;
++NumSunk;
Changed = true;
#ifndef NDEBUG
SmallVector<BasicBlock *, 32> ExitBlocks;
CurLoop->getUniqueExitBlocks(ExitBlocks);
- SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
+ SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
ExitBlocks.end());
#endif
@@ -688,8 +696,8 @@ static bool sink(Instruction &I, const L
static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
const LICMSafetyInfo *SafetyInfo) {
auto *Preheader = CurLoop->getLoopPreheader();
- DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": "
- << I << "\n");
+ DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I
+ << "\n");
// Metadata can be dependent on conditions we are hoisting above.
// Conservatively strip all metadata on the instruction unless we were
@@ -705,8 +713,10 @@ static bool hoist(Instruction &I, const
// Move the new node to the Preheader, before its terminator.
I.moveBefore(Preheader->getTerminator());
- if (isa<LoadInst>(I)) ++NumMovedLoads;
- else if (isa<CallInst>(I)) ++NumMovedCalls;
+ if (isa<LoadInst>(I))
+ ++NumMovedLoads;
+ else if (isa<CallInst>(I))
+ ++NumMovedCalls;
++NumHoisted;
return true;
}
@@ -714,7 +724,7 @@ static bool hoist(Instruction &I, const
/// Only sink or hoist an instruction if it is not a trapping instruction,
/// or if the instruction is known not to trap when moved to the preheader.
/// or if it is a trapping instruction and is guaranteed to execute.
-static bool isSafeToExecuteUnconditionally(const Instruction &Inst,
+static bool isSafeToExecuteUnconditionally(const Instruction &Inst,
const DominatorTree *DT,
const TargetLibraryInfo *TLI,
const Loop *CurLoop,
@@ -727,9 +737,8 @@ static bool isSafeToExecuteUnconditional
}
static bool isGuaranteedToExecute(const Instruction &Inst,
- const DominatorTree *DT,
- const Loop *CurLoop,
- const LICMSafetyInfo * SafetyInfo) {
+ const DominatorTree *DT, const Loop *CurLoop,
+ const LICMSafetyInfo *SafetyInfo) {
// We have to check to make sure that the instruction dominates all
// of the exit blocks. If it doesn't, then there is a path out of the loop
@@ -749,7 +758,7 @@ static bool isGuaranteedToExecute(const
return false;
// Get the exit blocks for the current loop.
- SmallVector<BasicBlock*, 8> ExitBlocks;
+ SmallVector<BasicBlock *, 8> ExitBlocks;
CurLoop->getExitBlocks(ExitBlocks);
// Verify that the block dominates each of the exit blocks of the loop.
@@ -766,82 +775,79 @@ static bool isGuaranteedToExecute(const
}
namespace {
- class LoopPromoter : public LoadAndStorePromoter {
- Value *SomePtr; // Designated pointer to store to.
- SmallPtrSetImpl<Value*> &PointerMustAliases;
- SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
- SmallVectorImpl<Instruction*> &LoopInsertPts;
- PredIteratorCache &PredCache;
- AliasSetTracker &AST;
- LoopInfo &LI;
- DebugLoc DL;
- int Alignment;
- AAMDNodes AATags;
-
- Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
- if (Instruction *I = dyn_cast<Instruction>(V))
- if (Loop *L = LI.getLoopFor(I->getParent()))
- if (!L->contains(BB)) {
- // We need to create an LCSSA PHI node for the incoming value and
- // store that.
- PHINode *PN =
- PHINode::Create(I->getType(), PredCache.size(BB),
- I->getName() + ".lcssa", &BB->front());
- for (BasicBlock *Pred : PredCache.get(BB))
- PN->addIncoming(I, Pred);
- return PN;
- }
- return V;
- }
+class LoopPromoter : public LoadAndStorePromoter {
+ Value *SomePtr; // Designated pointer to store to.
+ SmallPtrSetImpl<Value *> &PointerMustAliases;
+ SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
+ SmallVectorImpl<Instruction *> &LoopInsertPts;
+ PredIteratorCache &PredCache;
+ AliasSetTracker &AST;
+ LoopInfo &LI;
+ DebugLoc DL;
+ int Alignment;
+ AAMDNodes AATags;
- public:
- LoopPromoter(Value *SP,
- ArrayRef<const Instruction *> Insts,
- SSAUpdater &S, SmallPtrSetImpl<Value *> &PMA,
- SmallVectorImpl<BasicBlock *> &LEB,
- SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
- AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
- const AAMDNodes &AATags)
- : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
- LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
- LI(li), DL(dl), Alignment(alignment), AATags(AATags) {}
-
- bool isInstInList(Instruction *I,
- const SmallVectorImpl<Instruction*> &) const override {
- Value *Ptr;
- if (LoadInst *LI = dyn_cast<LoadInst>(I))
- Ptr = LI->getOperand(0);
- else
- Ptr = cast<StoreInst>(I)->getPointerOperand();
- return PointerMustAliases.count(Ptr);
- }
+ Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (Loop *L = LI.getLoopFor(I->getParent()))
+ if (!L->contains(BB)) {
+ // We need to create an LCSSA PHI node for the incoming value and
+ // store that.
+ PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
+ I->getName() + ".lcssa", &BB->front());
+ for (BasicBlock *Pred : PredCache.get(BB))
+ PN->addIncoming(I, Pred);
+ return PN;
+ }
+ return V;
+ }
- void doExtraRewritesBeforeFinalDeletion() const override {
- // Insert stores after in the loop exit blocks. Each exit block gets a
- // store of the live-out values that feed them. Since we've already told
- // the SSA updater about the defs in the loop and the preheader
- // definition, it is all set and we can start using it.
- for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
- BasicBlock *ExitBlock = LoopExitBlocks[i];
- Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
- LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
- Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
- Instruction *InsertPos = LoopInsertPts[i];
- StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
- NewSI->setAlignment(Alignment);
- NewSI->setDebugLoc(DL);
- if (AATags) NewSI->setAAMetadata(AATags);
- }
- }
+public:
+ LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
+ SmallPtrSetImpl<Value *> &PMA,
+ SmallVectorImpl<BasicBlock *> &LEB,
+ SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
+ AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
+ const AAMDNodes &AATags)
+ : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
+ LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
+ LI(li), DL(dl), Alignment(alignment), AATags(AATags) {}
+
+ bool isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction *> &) const override {
+ Value *Ptr;
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ Ptr = LI->getOperand(0);
+ else
+ Ptr = cast<StoreInst>(I)->getPointerOperand();
+ return PointerMustAliases.count(Ptr);
+ }
- void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
- // Update alias analysis.
- AST.copyValue(LI, V);
+ void doExtraRewritesBeforeFinalDeletion() const override {
+ // Insert stores after in the loop exit blocks. Each exit block gets a
+ // store of the live-out values that feed them. Since we've already told
+ // the SSA updater about the defs in the loop and the preheader
+ // definition, it is all set and we can start using it.
+ for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = LoopExitBlocks[i];
+ Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
+ LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
+ Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
+ Instruction *InsertPos = LoopInsertPts[i];
+ StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
+ NewSI->setAlignment(Alignment);
+ NewSI->setDebugLoc(DL);
+ if (AATags)
+ NewSI->setAAMetadata(AATags);
}
- void instructionDeleted(Instruction *I) const override {
- AST.deleteValue(I);
- }
- };
+ }
+
+ void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
+ // Update alias analysis.
+ AST.copyValue(LI, V);
+ }
+ void instructionDeleted(Instruction *I) const override { AST.deleteValue(I); }
+};
} // end anon namespace
/// Try to promote memory values to scalars by sinking stores out of the
@@ -849,19 +855,14 @@ namespace {
/// the stores in the loop, looking for stores to Must pointers which are
/// loop invariant.
///
-bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
- SmallVectorImpl<BasicBlock*>&ExitBlocks,
- SmallVectorImpl<Instruction*>&InsertPts,
- PredIteratorCache &PIC, LoopInfo *LI,
- DominatorTree *DT,
- const TargetLibraryInfo *TLI,
- Loop *CurLoop,
- AliasSetTracker *CurAST,
- LICMSafetyInfo * SafetyInfo) {
+bool llvm::promoteLoopAccessesToScalars(
+ AliasSet &AS, SmallVectorImpl<BasicBlock *> &ExitBlocks,
+ SmallVectorImpl<Instruction *> &InsertPts, PredIteratorCache &PIC,
+ LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
+ Loop *CurLoop, AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) {
// Verify inputs.
- assert(LI != nullptr && DT != nullptr &&
- CurLoop != nullptr && CurAST != nullptr &&
- SafetyInfo != nullptr &&
+ assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
+ CurAST != nullptr && SafetyInfo != nullptr &&
"Unexpected Input to promoteLoopAccessesToScalars");
// We can promote this alias set if it has a store, if it is a "Must" alias
@@ -875,7 +876,7 @@ bool llvm::promoteLoopAccessesToScalars(
"Must alias set should have at least one pointer element in it!");
Value *SomePtr = AS.begin()->getValue();
- BasicBlock * Preheader = CurLoop->getLoopPreheader();
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();
// It isn't safe to promote a load/store from the loop if the load/store is
// conditional. For example, turning:
@@ -907,8 +908,8 @@ bool llvm::promoteLoopAccessesToScalars(
// since they're all must alias.
bool CanSpeculateLoad = false;
- SmallVector<Instruction*, 64> LoopUses;
- SmallPtrSet<Value*, 4> PointerMustAliases;
+ SmallVector<Instruction *, 64> LoopUses;
+ SmallPtrSet<Value *, 4> PointerMustAliases;
// We start with an alignment of one and try to find instructions that allow
// us to prove better alignment.
@@ -923,7 +924,7 @@ bool llvm::promoteLoopAccessesToScalars(
// is available.
if (!HasDedicatedExits || !Preheader)
return false;
-
+
const DataLayout &MDL = Preheader->getModule()->getDataLayout();
// Check that all of the pointers in the alias set have the same type. We
@@ -954,10 +955,8 @@ bool llvm::promoteLoopAccessesToScalars(
return Changed;
if (!GuaranteedToExecute && !CanSpeculateLoad)
- CanSpeculateLoad =
- isSafeToExecuteUnconditionally(*Load, DT, TLI, CurLoop,
- SafetyInfo,
- Preheader->getTerminator());
+ CanSpeculateLoad = isSafeToExecuteUnconditionally(
+ *Load, DT, TLI, CurLoop, SafetyInfo, Preheader->getTerminator());
} else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
// Stores *of* the pointer are not interesting, only stores *to* the
// pointer.
@@ -983,16 +982,13 @@ bool llvm::promoteLoopAccessesToScalars(
}
if (!GuaranteedToExecute)
- GuaranteedToExecute = isGuaranteedToExecute(*UI, DT,
- CurLoop, SafetyInfo);
-
+ GuaranteedToExecute =
+ isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo);
if (!GuaranteedToExecute && !CanSpeculateLoad) {
- CanSpeculateLoad =
- isDereferenceableAndAlignedPointer(Store->getPointerOperand(),
- Store->getAlignment(), MDL,
- Preheader->getTerminator(),
- DT, TLI);
+ CanSpeculateLoad = isDereferenceableAndAlignedPointer(
+ Store->getPointerOperand(), Store->getAlignment(), MDL,
+ Preheader->getTerminator(), DT, TLI);
}
} else
return Changed; // Not a load or store.
@@ -1014,10 +1010,10 @@ bool llvm::promoteLoopAccessesToScalars(
if (!PromotionIsLegal && CanSpeculateLoad) {
// If this is a thread local location, then we can insert stores along
// paths which originally didn't have them without violating the memory
- // model.
+ // model.
Value *Object = GetUnderlyingObject(SomePtr, MDL);
- PromotionIsLegal = isAllocLikeFn(Object, TLI) &&
- !PointerMayBeCaptured(Object, true, true);
+ PromotionIsLegal =
+ isAllocLikeFn(Object, TLI) && !PointerMayBeCaptured(Object, true, true);
}
if (!PromotionIsLegal)
return Changed;
@@ -1038,7 +1034,8 @@ bool llvm::promoteLoopAccessesToScalars(
return Changed;
// Otherwise, this is safe to promote, lets do it!
- DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');
+ DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
+ << '\n');
Changed = true;
++NumPromoted;
@@ -1049,20 +1046,19 @@ bool llvm::promoteLoopAccessesToScalars(
DebugLoc DL = LoopUses[0]->getDebugLoc();
// We use the SSAUpdater interface to insert phi nodes as required.
- SmallVector<PHINode*, 16> NewPHIs;
+ SmallVector<PHINode *, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
- LoopPromoter Promoter(SomePtr, LoopUses, SSA,
- PointerMustAliases, ExitBlocks,
+ LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
InsertPts, PIC, *CurAST, *LI, DL, Alignment, AATags);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
- LoadInst *PreheaderLoad =
- new LoadInst(SomePtr, SomePtr->getName()+".promoted",
- Preheader->getTerminator());
+ LoadInst *PreheaderLoad = new LoadInst(
+ SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator());
PreheaderLoad->setAlignment(Alignment);
PreheaderLoad->setDebugLoc(DL);
- if (AATags) PreheaderLoad->setAAMetadata(AATags);
+ if (AATags)
+ PreheaderLoad->setAAMetadata(AATags);
SSA.AddAvailableValue(Preheader, PreheaderLoad);
// Rewrite all the loads in the loop and remember all the definitions from
@@ -1157,12 +1153,11 @@ void LICM::deleteAnalysisLoop(Loop *L) {
LoopToAliasSetMap.erase(L);
}
-
/// Return true if the body of this loop may store into the memory
/// location pointed to by V.
///
static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
- const AAMDNodes &AAInfo,
+ const AAMDNodes &AAInfo,
AliasSetTracker *CurAST) {
// Check to see if any of the basic blocks in CurLoop invalidate *V.
return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod();
@@ -1175,4 +1170,3 @@ static bool inSubLoop(BasicBlock *BB, Lo
assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
return LI->getLoopFor(BB) != CurLoop;
}
-
Modified: llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp?rev=268583&r1=268582&r2=268583&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp Wed May 4 19:54:54 2016
@@ -12,11 +12,10 @@
// counts of loops easily.
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/LoopUnrollAnalyzer.h"
@@ -32,6 +31,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <climits>
@@ -60,34 +60,34 @@ static cl::opt<unsigned> UnrollMaxIterat
cl::desc("Don't allow loop unrolling to simulate more than this number of"
"iterations when checking full unroll profitability"));
-static cl::opt<unsigned>
-UnrollCount("unroll-count", cl::Hidden,
- cl::desc("Use this unroll count for all loops including those with "
- "unroll_count pragma values, for testing purposes"));
-
-static cl::opt<unsigned>
-UnrollMaxCount("unroll-max-count", cl::Hidden,
- cl::desc("Set the max unroll count for partial and runtime unrolling, for"
- "testing purposes"));
-
-static cl::opt<unsigned>
-UnrollFullMaxCount("unroll-full-max-count", cl::Hidden,
- cl::desc("Set the max unroll count for full unrolling, for testing purposes"));
+static cl::opt<unsigned> UnrollCount(
+ "unroll-count", cl::Hidden,
+ cl::desc("Use this unroll count for all loops including those with "
+ "unroll_count pragma values, for testing purposes"));
+
+static cl::opt<unsigned> UnrollMaxCount(
+ "unroll-max-count", cl::Hidden,
+ cl::desc("Set the max unroll count for partial and runtime unrolling, for"
+ "testing purposes"));
+
+static cl::opt<unsigned> UnrollFullMaxCount(
+ "unroll-full-max-count", cl::Hidden,
+ cl::desc(
+ "Set the max unroll count for full unrolling, for testing purposes"));
static cl::opt<bool>
-UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
- cl::desc("Allows loops to be partially unrolled until "
- "-unroll-threshold loop size is reached."));
+ UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
+ cl::desc("Allows loops to be partially unrolled until "
+ "-unroll-threshold loop size is reached."));
static cl::opt<bool>
-UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
- cl::desc("Unroll loops with run-time trip counts"));
-
-static cl::opt<unsigned>
-PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
- cl::desc("Unrolled size limit for loops with an unroll(full) or "
- "unroll_count pragma."));
+ UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
+ cl::desc("Unroll loops with run-time trip counts"));
+static cl::opt<unsigned> PragmaUnrollThreshold(
+ "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
+ cl::desc("Unrolled size limit for loops with an unroll(full) or "
+ "unroll_count pragma."));
/// A magic value for use with the Threshold parameter to indicate
/// that the loop unroll should be performed regardless of how much
@@ -453,7 +453,8 @@ static unsigned UnrollCountPragmaValue(c
// unrolling pass is run more than once (which it generally is).
static void SetLoopAlreadyUnrolled(Loop *L) {
MDNode *LoopID = L->getLoopID();
- if (!LoopID) return;
+ if (!LoopID)
+ return;
// First remove any existing loop unrolling metadata.
SmallVector<Metadata *, 4> MDs;
@@ -514,9 +515,9 @@ static bool canUnrollCompletely(Loop *L,
(int64_t)UnrolledCost - (int64_t)DynamicCostSavingsDiscount <=
(int64_t)Threshold) {
DEBUG(dbgs() << " Can fully unroll, because unrolling will reduce the "
- "expected dynamic cost by " << PercentDynamicCostSaved
- << "% (threshold: " << PercentDynamicCostSavedThreshold
- << "%)\n"
+ "expected dynamic cost by "
+ << PercentDynamicCostSaved << "% (threshold: "
+ << PercentDynamicCostSavedThreshold << "%)\n"
<< " and the unrolled cost (" << UnrolledCost
<< ") is less than the max threshold ("
<< DynamicCostSavingsDiscount << ").\n");
@@ -544,7 +545,7 @@ static bool tryToUnrollLoop(Loop *L, Dom
Optional<bool> ProvidedRuntime) {
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
- << "] Loop %" << Header->getName() << "\n");
+ << "] Loop %" << Header->getName() << "\n");
if (HasUnrollDisablePragma(L)) {
return false;
@@ -592,7 +593,7 @@ static bool tryToUnrollLoop(Loop *L, Dom
// When computing the unrolled size, note that the conditional branch on the
// backedge and the comparison feeding it are not replicated like the rest of
// the loop body (which is why 2 is subtracted).
- uint64_t UnrolledSize = (uint64_t)(LoopSize-2) * Count + 2;
+ uint64_t UnrolledSize = (uint64_t)(LoopSize - 2) * Count + 2;
if (NotDuplicatable) {
DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
<< " instructions.\n");
@@ -680,7 +681,7 @@ static bool tryToUnrollLoop(Loop *L, Dom
// the original count which satisfies the threshold limit.
while (Count != 0 && UnrolledSize > UP.PartialThreshold) {
Count >>= 1;
- UnrolledSize = (LoopSize-2) * Count + 2;
+ UnrolledSize = (LoopSize - 2) * Count + 2;
}
if (Count > UP.MaxCount)
Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=268583&r1=268582&r2=268583&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Wed May 4 19:54:54 2016
@@ -46,7 +46,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Vectorize.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
@@ -57,9 +56,9 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/DemandedBits.h"
@@ -73,6 +72,7 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
@@ -98,9 +98,9 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/LoopVersioning.h"
-#include "llvm/Analysis/VectorUtils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/LoopVersioning.h"
+#include "llvm/Transforms/Vectorize.h"
#include <algorithm>
#include <functional>
#include <map>
@@ -116,16 +116,15 @@ STATISTIC(LoopsVectorized, "Number of lo
STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization");
static cl::opt<bool>
-EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
- cl::desc("Enable if-conversion during vectorization."));
+ EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
+ cl::desc("Enable if-conversion during vectorization."));
/// We don't vectorize loops with a known constant trip count below this number.
-static cl::opt<unsigned>
-TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16),
- cl::Hidden,
- cl::desc("Don't vectorize loops with a constant "
- "trip count that is smaller than this "
- "value."));
+static cl::opt<unsigned> TinyTripCountVectorThreshold(
+ "vectorizer-min-trip-count", cl::init(16), cl::Hidden,
+ cl::desc("Don't vectorize loops with a constant "
+ "trip count that is smaller than this "
+ "value."));
static cl::opt<bool> MaximizeBandwidth(
"vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden,
@@ -263,7 +262,7 @@ public:
/// A helper function for converting Scalar types to vector types.
/// If the incoming type is void, we return void. If the VF is 1, we return
/// the scalar type.
-static Type* ToVectorTy(Type *Scalar, unsigned VF) {
+static Type *ToVectorTy(Type *Scalar, unsigned VF) {
if (Scalar->isVoidTy() || VF == 1)
return Scalar;
return VectorType::get(Scalar, VF);
@@ -327,7 +326,7 @@ public:
// can be validly truncated to. The cost model has assumed this truncation
// will happen when vectorizing.
void vectorize(LoopVectorizationLegality *L,
- MapVector<Instruction*,uint64_t> MinimumBitWidths) {
+ MapVector<Instruction *, uint64_t> MinimumBitWidths) {
MinBWs = MinimumBitWidths;
Legal = L;
// Create a new empty loop. Unlink the old loop and connect the new one.
@@ -338,24 +337,22 @@ public:
}
// Return true if any runtime check is added.
- bool IsSafetyChecksAdded() {
- return AddedSafetyChecks;
- }
+ bool IsSafetyChecksAdded() { return AddedSafetyChecks; }
virtual ~InnerLoopVectorizer() {}
protected:
/// A small list of PHINodes.
- typedef SmallVector<PHINode*, 4> PhiVector;
+ typedef SmallVector<PHINode *, 4> PhiVector;
/// When we unroll loops we have multiple vector values for each scalar.
/// This data structure holds the unrolled and vectorized values that
/// originated from one scalar instruction.
- typedef SmallVector<Value*, 2> VectorParts;
+ typedef SmallVector<Value *, 2> VectorParts;
// When we if-convert we need to create edge masks. We have to cache values
// so that we don't end up with exponential recursion/IR.
- typedef DenseMap<std::pair<BasicBlock*, BasicBlock*>,
- VectorParts> EdgeMaskCache;
+ typedef DenseMap<std::pair<BasicBlock *, BasicBlock *>, VectorParts>
+ EdgeMaskCache;
/// Create an empty loop, based on the loop ranges of the old loop.
void createEmptyLoop();
@@ -392,8 +389,8 @@ protected:
/// Vectorize a single PHINode in a block. This method handles the induction
/// variable canonicalization. It supports both VF = 1 for unrolled loops and
/// arbitrary length vectors.
- void widenPHIInstruction(Instruction *PN, VectorParts &Entry,
- unsigned UF, unsigned VF, PhiVector *PV);
+ void widenPHIInstruction(Instruction *PN, VectorParts &Entry, unsigned UF,
+ unsigned VF, PhiVector *PV);
/// Insert the new loop to the loop hierarchy and pass manager
/// and update the analysis passes.
@@ -404,7 +401,7 @@ protected:
/// scalarized instruction behind an if block predicated on the control
/// dependence of the instruction.
virtual void scalarizeInstruction(Instruction *Instr,
- bool IfPredicateStore=false);
+ bool IfPredicateStore = false);
/// Vectorize Load and Store instructions,
virtual void vectorizeMemoryInstruction(Instruction *Instr);
@@ -554,11 +551,11 @@ protected:
BasicBlock *LoopScalarPreHeader;
/// Middle Block between the vector and the scalar.
BasicBlock *LoopMiddleBlock;
- ///The ExitBlock of the scalar loop.
+ /// The ExitBlock of the scalar loop.
BasicBlock *LoopExitBlock;
- ///The vector loop body.
+ /// The vector loop body.
SmallVector<BasicBlock *, 4> LoopVectorBody;
- ///The scalar loop body.
+ /// The scalar loop body.
BasicBlock *LoopScalarBody;
/// A list of all bypass blocks. The first block is the entry of the loop.
SmallVector<BasicBlock *, 4> LoopBypassBlocks;
@@ -571,7 +568,7 @@ protected:
ValueMap WidenMap;
/// Store instructions that should be predicated, as a pair
/// <StoreInst, Predicate>
- SmallVector<std::pair<StoreInst*,Value*>, 4> PredicatedStores;
+ SmallVector<std::pair<StoreInst *, Value *>, 4> PredicatedStores;
EdgeMaskCache MaskCache;
/// Trip count of the original loop.
Value *TripCount;
@@ -581,7 +578,7 @@ protected:
/// Map of scalar integer values to the smallest bitwidth they can be legally
/// represented as. The vector equivalents of these values should be truncated
/// to this type.
- MapVector<Instruction*,uint64_t> MinBWs;
+ MapVector<Instruction *, uint64_t> MinBWs;
LoopVectorizationLegality *Legal;
// Record whether runtime check is added.
@@ -665,10 +662,8 @@ static void propagateMetadata(Instructio
// on the condition, and thus actually aliased with some other
// non-speculated memory access when the condition was false, this would be
// caught by the runtime overlap checks).
- if (Kind != LLVMContext::MD_tbaa &&
- Kind != LLVMContext::MD_alias_scope &&
- Kind != LLVMContext::MD_noalias &&
- Kind != LLVMContext::MD_fpmath &&
+ if (Kind != LLVMContext::MD_tbaa && Kind != LLVMContext::MD_alias_scope &&
+ Kind != LLVMContext::MD_noalias && Kind != LLVMContext::MD_fpmath &&
Kind != LLVMContext::MD_nontemporal)
continue;
@@ -934,20 +929,16 @@ private:
/// for example 'force', means a decision has been made. So, we need to be
/// careful NOT to add them if the user hasn't specifically asked so.
class LoopVectorizeHints {
- enum HintKind {
- HK_WIDTH,
- HK_UNROLL,
- HK_FORCE
- };
+ enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE };
/// Hint - associates name and validation with the hint value.
struct Hint {
- const char * Name;
+ const char *Name;
unsigned Value; // This may have to change for non-numeric values.
HintKind Kind;
- Hint(const char * Name, unsigned Value, HintKind Kind)
- : Name(Name), Value(Value), Kind(Kind) { }
+ Hint(const char *Name, unsigned Value, HintKind Kind)
+ : Name(Name), Value(Value), Kind(Kind) {}
bool validate(unsigned Val) {
switch (Kind) {
@@ -1094,9 +1085,7 @@ public:
return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe;
}
- void setPotentiallyUnsafe() {
- PotentiallyUnsafe = true;
- }
+ void setPotentiallyUnsafe() { PotentiallyUnsafe = true; }
private:
/// Find hints specified in the loop metadata and update local values.
@@ -1143,7 +1132,8 @@ private:
Name = Name.substr(Prefix().size(), StringRef::npos);
const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
- if (!C) return;
+ if (!C)
+ return;
unsigned Val = C->getZExtValue();
Hint *Hints[] = {&Width, &Interleave, &Force};
@@ -1169,7 +1159,7 @@ private:
/// Matches metadata with hint name.
bool matchesHintMetadataName(MDNode *Node, ArrayRef<Hint> HintTypes) {
- MDString* Name = dyn_cast<MDString>(Node->getOperand(0));
+ MDString *Name = dyn_cast<MDString>(Node->getOperand(0));
if (!Name)
return false;
@@ -1271,7 +1261,7 @@ public:
/// InductionList saves induction variables and maps them to the
/// induction descriptor.
- typedef MapVector<PHINode*, InductionDescriptor> InductionList;
+ typedef MapVector<PHINode *, InductionDescriptor> InductionList;
/// RecurrenceSet contains the phi nodes that are recurrences other than
/// inductions and reductions.
@@ -1324,16 +1314,14 @@ public:
bool isUniform(Value *V);
/// Returns true if this instruction will remain scalar after vectorization.
- bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); }
+ bool isUniformAfterVectorization(Instruction *I) { return Uniforms.count(I); }
/// Returns the information that we collected about runtime memory check.
const RuntimePointerChecking *getRuntimePointerChecking() const {
return LAI->getRuntimePointerChecking();
}
- const LoopAccessInfo *getLAI() const {
- return LAI;
- }
+ const LoopAccessInfo *getLAI() const { return LAI; }
/// \brief Check if \p Instr belongs to any interleaved access group.
bool isAccessInterleaved(Instruction *Instr) {
@@ -1383,18 +1371,11 @@ public:
/// Returns true if vector representation of the instruction \p I
/// requires mask.
- bool isMaskRequired(const Instruction* I) {
- return (MaskedOp.count(I) != 0);
- }
- unsigned getNumStores() const {
- return LAI->getNumStores();
- }
- unsigned getNumLoads() const {
- return LAI->getNumLoads();
- }
- unsigned getNumPredStores() const {
- return NumPredStores;
- }
+ bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); }
+ unsigned getNumStores() const { return LAI->getNumStores(); }
+ unsigned getNumLoads() const { return LAI->getNumLoads(); }
+ unsigned getNumPredStores() const { return NumPredStores; }
+
private:
/// Check if a single basic block loop is vectorizable.
/// At this point we know that this is a loop with a constant trip count
@@ -1479,10 +1460,10 @@ private:
/// Allowed outside users. This holds the reduction
/// vars which can be accessed from outside the loop.
- SmallPtrSet<Value*, 4> AllowedExit;
+ SmallPtrSet<Value *, 4> AllowedExit;
/// This set holds the variables which are known to be uniform after
/// vectorization.
- SmallPtrSet<Instruction*, 4> Uniforms;
+ SmallPtrSet<Instruction *, 4> Uniforms;
/// Can we assume the absence of NaNs.
bool HasFunNoNaNAttr;
@@ -1513,9 +1494,9 @@ public:
LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
LoopVectorizationLegality *Legal,
const TargetTransformInfo &TTI,
- const TargetLibraryInfo *TLI,
- DemandedBits *DB, AssumptionCache *AC,
- const Function *F, const LoopVectorizeHints *Hints,
+ const TargetLibraryInfo *TLI, DemandedBits *DB,
+ AssumptionCache *AC, const Function *F,
+ const LoopVectorizeHints *Hints,
SmallPtrSetImpl<const Value *> &ValuesToIgnore)
: TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB),
TheFunction(F), Hints(Hints), ValuesToIgnore(ValuesToIgnore) {}
@@ -1523,7 +1504,7 @@ public:
/// Information about vectorization costs
struct VectorizationFactor {
unsigned Width; // Vector width with best cost
- unsigned Cost; // Cost of the loop with that width
+ unsigned Cost; // Cost of the loop with that width
};
/// \return The most profitable vectorization factor and the cost of that VF.
/// This method checks every power of two up to VF. If UserVF is not ZERO
@@ -1567,8 +1548,10 @@ public:
private:
/// The vectorization cost is a combination of the cost itself and a boolean
- /// indicating whether any of the contributing operations will actually operate on
- /// vector values after type legalization in the backend. If this latter value is
+ /// indicating whether any of the contributing operations will actually
+ /// operate on
+ /// vector values after type legalization in the backend. If this latter value
+ /// is
/// false, then all operations will be scalarized (i.e. no vectorization has
/// actually taken place).
typedef std::pair<unsigned, bool> VectorizationCostTy;
@@ -1603,7 +1586,7 @@ public:
/// Map of scalar integer values to the smallest bitwidth they can be legally
/// represented as. The vector equivalents of these values should be truncated
/// to this type.
- MapVector<Instruction*,uint64_t> MinBWs;
+ MapVector<Instruction *, uint64_t> MinBWs;
/// The loop that we evaluate.
Loop *TheLoop;
@@ -1699,9 +1682,8 @@ struct LoopVectorize : public FunctionPa
static char ID;
explicit LoopVectorize(bool NoUnrolling = false, bool AlwaysVectorize = true)
- : FunctionPass(ID),
- DisableUnrolling(NoUnrolling),
- AlwaysVectorize(AlwaysVectorize) {
+ : FunctionPass(ID), DisableUnrolling(NoUnrolling),
+ AlwaysVectorize(AlwaysVectorize) {
initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
}
@@ -1823,7 +1805,8 @@ struct LoopVectorize : public FunctionPa
? "disabled"
: (Hints.getForce() == LoopVectorizeHints::FK_Enabled
? "enabled"
- : "?")) << " width=" << Hints.getWidth()
+ : "?"))
+ << " width=" << Hints.getWidth()
<< " unroll=" << Hints.getInterleave() << "\n");
// Function containing loop
@@ -1887,8 +1870,8 @@ struct LoopVectorize : public FunctionPa
// Check the function attributes to find out if this function should be
// optimized for size.
- bool OptForSize = Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
- F->optForSize();
+ bool OptForSize =
+ Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize();
// Compute the weighted frequency of this loop being executed and see if it
// is less than 20% of the function entry baseline frequency. Note that we
@@ -1908,7 +1891,7 @@ struct LoopVectorize : public FunctionPa
// vector instructions?
if (F->hasFnAttribute(Attribute::NoImplicitFloat)) {
DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat"
- "attribute is used.\n");
+ "attribute is used.\n");
emitAnalysisDiag(
F, L, Hints,
VectorizationReport()
@@ -1924,10 +1907,9 @@ struct LoopVectorize : public FunctionPa
if (Hints.isPotentiallyUnsafe() &&
TTI->isFPVectorizationPotentiallyUnsafe()) {
DEBUG(dbgs() << "LV: Potentially unsafe FP op prevents vectorization.\n");
- emitAnalysisDiag(
- F, L, Hints,
- VectorizationReport()
- << "loop not vectorized due to unsafe FP support.");
+ emitAnalysisDiag(F, L, Hints,
+ VectorizationReport()
+ << "loop not vectorized due to unsafe FP support.");
emitMissedWarning(F, L, Hints);
return false;
}
@@ -2058,7 +2040,6 @@ struct LoopVectorize : public FunctionPa
AU.addPreserved<BasicAAWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
}
-
};
} // end anonymous namespace
@@ -2071,9 +2052,9 @@ struct LoopVectorize : public FunctionPa
Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
// We need to place the broadcast of invariant variables outside the loop.
Instruction *Instr = dyn_cast<Instruction>(V);
- bool NewInstr =
- (Instr && std::find(LoopVectorBody.begin(), LoopVectorBody.end(),
- Instr->getParent()) != LoopVectorBody.end());
+ bool NewInstr = (Instr &&
+ std::find(LoopVectorBody.begin(), LoopVectorBody.end(),
+ Instr->getParent()) != LoopVectorBody.end());
bool Invariant = OrigLoop->isLoopInvariant(V) && !NewInstr;
// Place the code for broadcasting invariant variables in the new preheader.
@@ -2098,7 +2079,7 @@ Value *InnerLoopVectorizer::getStepVecto
Type *ITy = Val->getType()->getScalarType();
VectorType *Ty = cast<VectorType>(Val->getType());
int VLen = Ty->getNumElements();
- SmallVector<Constant*, 8> Indices;
+ SmallVector<Constant *, 8> Indices;
// Create a vector of consecutive numbers from zero to VF.
for (int i = 0; i < VLen; ++i)
@@ -2204,7 +2185,7 @@ bool LoopVectorizationLegality::isUnifor
return LAI->isUniform(V);
}
-InnerLoopVectorizer::VectorParts&
+InnerLoopVectorizer::VectorParts &
InnerLoopVectorizer::getVectorValue(Value *V) {
assert(V != Induction && "The new induction variable should not be used.");
assert(!V->getType()->isVectorTy() && "Can't widen a vector");
@@ -2225,7 +2206,7 @@ InnerLoopVectorizer::getVectorValue(Valu
Value *InnerLoopVectorizer::reverseVector(Value *Vec) {
assert(Vec->getType()->isVectorTy() && "Invalid type");
- SmallVector<Constant*, 8> ShuffleMask;
+ SmallVector<Constant *, 8> ShuffleMask;
for (unsigned i = 0; i < VF; ++i)
ShuffleMask.push_back(Builder.getInt32(VF - i - 1));
@@ -2518,10 +2499,10 @@ void InnerLoopVectorizer::vectorizeMemor
// scalarize the instruction.
int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
bool Reverse = ConsecutiveStride < 0;
- bool CreateGatherScatter = !ConsecutiveStride &&
- ((LI && Legal->isLegalMaskedGather(ScalarDataTy)) ||
- (SI && Legal->isLegalMaskedScatter(ScalarDataTy)));
-
+ bool CreateGatherScatter =
+ !ConsecutiveStride && ((LI && Legal->isLegalMaskedGather(ScalarDataTy)) ||
+ (SI && Legal->isLegalMaskedScatter(ScalarDataTy)));
+
if (!ConsecutiveStride && !CreateGatherScatter)
return scalarizeInstruction(Instr);
@@ -2534,83 +2515,83 @@ void InnerLoopVectorizer::vectorizeMemor
if (ConsecutiveStride) {
if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) {
setDebugLocFromInst(Builder, Gep);
- Value *PtrOperand = Gep->getPointerOperand();
- Value *FirstBasePtr = getVectorValue(PtrOperand)[0];
- FirstBasePtr = Builder.CreateExtractElement(FirstBasePtr, Zero);
-
- // Create the new GEP with the new induction variable.
- GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
- Gep2->setOperand(0, FirstBasePtr);
- Gep2->setName("gep.indvar.base");
- Ptr = Builder.Insert(Gep2);
- } else if (Gep) {
- setDebugLocFromInst(Builder, Gep);
- assert(PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getPointerOperand()),
- OrigLoop) &&
- "Base ptr must be invariant");
- // The last index does not have to be the induction. It can be
- // consecutive and be a function of the index. For example A[I+1];
- unsigned NumOperands = Gep->getNumOperands();
- unsigned InductionOperand = getGEPInductionOperand(Gep);
- // Create the new GEP with the new induction variable.
- GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
-
- for (unsigned i = 0; i < NumOperands; ++i) {
- Value *GepOperand = Gep->getOperand(i);
- Instruction *GepOperandInst = dyn_cast<Instruction>(GepOperand);
-
- // Update last index or loop invariant instruction anchored in loop.
- if (i == InductionOperand ||
- (GepOperandInst && OrigLoop->contains(GepOperandInst))) {
- assert((i == InductionOperand ||
- PSE.getSE()->isLoopInvariant(PSE.getSCEV(GepOperandInst),
- OrigLoop)) &&
- "Must be last index or loop invariant");
-
- VectorParts &GEPParts = getVectorValue(GepOperand);
- Value *Index = GEPParts[0];
- Index = Builder.CreateExtractElement(Index, Zero);
- Gep2->setOperand(i, Index);
- Gep2->setName("gep.indvar.idx");
- }
+ Value *PtrOperand = Gep->getPointerOperand();
+ Value *FirstBasePtr = getVectorValue(PtrOperand)[0];
+ FirstBasePtr = Builder.CreateExtractElement(FirstBasePtr, Zero);
+
+ // Create the new GEP with the new induction variable.
+ GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
+ Gep2->setOperand(0, FirstBasePtr);
+ Gep2->setName("gep.indvar.base");
+ Ptr = Builder.Insert(Gep2);
+ } else if (Gep) {
+ setDebugLocFromInst(Builder, Gep);
+ assert(PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getPointerOperand()),
+ OrigLoop) &&
+ "Base ptr must be invariant");
+ // The last index does not have to be the induction. It can be
+ // consecutive and be a function of the index. For example A[I+1];
+ unsigned NumOperands = Gep->getNumOperands();
+ unsigned InductionOperand = getGEPInductionOperand(Gep);
+ // Create the new GEP with the new induction variable.
+ GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
+
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ Value *GepOperand = Gep->getOperand(i);
+ Instruction *GepOperandInst = dyn_cast<Instruction>(GepOperand);
+
+ // Update last index or loop invariant instruction anchored in loop.
+ if (i == InductionOperand ||
+ (GepOperandInst && OrigLoop->contains(GepOperandInst))) {
+ assert((i == InductionOperand ||
+ PSE.getSE()->isLoopInvariant(PSE.getSCEV(GepOperandInst),
+ OrigLoop)) &&
+ "Must be last index or loop invariant");
+
+ VectorParts &GEPParts = getVectorValue(GepOperand);
+ Value *Index = GEPParts[0];
+ Index = Builder.CreateExtractElement(Index, Zero);
+ Gep2->setOperand(i, Index);
+ Gep2->setName("gep.indvar.idx");
}
- Ptr = Builder.Insert(Gep2);
- } else { // No GEP
- // Use the induction element ptr.
- assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
- setDebugLocFromInst(Builder, Ptr);
- VectorParts &PtrVal = getVectorValue(Ptr);
- Ptr = Builder.CreateExtractElement(PtrVal[0], Zero);
}
- } else {
- // At this point we should vector version of GEP for Gather or Scatter
- assert(CreateGatherScatter && "The instruction should be scalarized");
- if (Gep) {
- SmallVector<VectorParts, 4> OpsV;
- // Vectorizing GEP, across UF parts, we want to keep each loop-invariant
- // base or index of GEP scalar
- for (Value *Op : Gep->operands()) {
- if (PSE.getSE()->isLoopInvariant(PSE.getSCEV(Op), OrigLoop))
- OpsV.push_back(VectorParts(UF, Op));
- else
- OpsV.push_back(getVectorValue(Op));
- }
-
- for (unsigned Part = 0; Part < UF; ++Part) {
- SmallVector<Value*, 4> Ops;
- Value *GEPBasePtr = OpsV[0][Part];
- for (unsigned i = 1; i < Gep->getNumOperands(); i++)
- Ops.push_back(OpsV[i][Part]);
- Value *NewGep = Builder.CreateGEP(nullptr, GEPBasePtr, Ops,
- "VectorGep");
- assert(NewGep->getType()->isVectorTy() && "Expected vector GEP");
- NewGep = Builder.CreateBitCast(NewGep,
- VectorType::get(Ptr->getType(), VF));
- VectorGep.push_back(NewGep);
- }
- } else
- VectorGep = getVectorValue(Ptr);
+ Ptr = Builder.Insert(Gep2);
+ } else { // No GEP
+ // Use the induction element ptr.
+ assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
+ setDebugLocFromInst(Builder, Ptr);
+ VectorParts &PtrVal = getVectorValue(Ptr);
+ Ptr = Builder.CreateExtractElement(PtrVal[0], Zero);
}
+ } else {
+ // At this point we should vector version of GEP for Gather or Scatter
+ assert(CreateGatherScatter && "The instruction should be scalarized");
+ if (Gep) {
+ SmallVector<VectorParts, 4> OpsV;
+ // Vectorizing GEP, across UF parts, we want to keep each loop-invariant
+ // base or index of GEP scalar
+ for (Value *Op : Gep->operands()) {
+ if (PSE.getSE()->isLoopInvariant(PSE.getSCEV(Op), OrigLoop))
+ OpsV.push_back(VectorParts(UF, Op));
+ else
+ OpsV.push_back(getVectorValue(Op));
+ }
+
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ SmallVector<Value *, 4> Ops;
+ Value *GEPBasePtr = OpsV[0][Part];
+ for (unsigned i = 1; i < Gep->getNumOperands(); i++)
+ Ops.push_back(OpsV[i][Part]);
+ Value *NewGep =
+ Builder.CreateGEP(nullptr, GEPBasePtr, Ops, "VectorGep");
+ assert(NewGep->getType()->isVectorTy() && "Expected vector GEP");
+ NewGep =
+ Builder.CreateBitCast(NewGep, VectorType::get(Ptr->getType(), VF));
+ VectorGep.push_back(NewGep);
+ }
+ } else
+ VectorGep = getVectorValue(Ptr);
+ }
VectorParts Mask = createBlockInMask(Instr->getParent());
// Handle Stores:
@@ -2631,7 +2612,7 @@ void InnerLoopVectorizer::vectorizeMemor
} else {
// Calculate the pointer for the specific unroll-part.
Value *PartPtr =
- Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF));
+ Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF));
if (Reverse) {
// If we store to reverse consecutive memory locations, then we need
@@ -2639,20 +2620,22 @@ void InnerLoopVectorizer::vectorizeMemor
StoredVal[Part] = reverseVector(StoredVal[Part]);
// If the address is consecutive but reversed, then the
// wide store needs to start at the last vector element.
- PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF));
- PartPtr = Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF));
+ PartPtr =
+ Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF));
+ PartPtr =
+ Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF));
Mask[Part] = reverseVector(Mask[Part]);
}
- Value *VecPtr = Builder.CreateBitCast(PartPtr,
- DataTy->getPointerTo(AddressSpace));
+ Value *VecPtr =
+ Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
if (Legal->isMaskRequired(SI))
NewSI = Builder.CreateMaskedStore(StoredVal[Part], VecPtr, Alignment,
Mask[Part]);
- else
- NewSI = Builder.CreateAlignedStore(StoredVal[Part], VecPtr,
- Alignment);
+ else
+ NewSI =
+ Builder.CreateAlignedStore(StoredVal[Part], VecPtr, Alignment);
}
addMetadata(NewSI, SI);
}
@@ -2663,16 +2646,16 @@ void InnerLoopVectorizer::vectorizeMemor
assert(LI && "Must have a load instruction");
setDebugLocFromInst(Builder, LI);
for (unsigned Part = 0; Part < UF; ++Part) {
- Instruction* NewLI;
+ Instruction *NewLI;
if (CreateGatherScatter) {
Value *MaskPart = Legal->isMaskRequired(LI) ? Mask[Part] : nullptr;
- NewLI = Builder.CreateMaskedGather(VectorGep[Part], Alignment,
- MaskPart, 0, "wide.masked.gather");
+ NewLI = Builder.CreateMaskedGather(VectorGep[Part], Alignment, MaskPart,
+ 0, "wide.masked.gather");
Entry[Part] = NewLI;
} else {
// Calculate the pointer for the specific unroll-part.
Value *PartPtr =
- Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF));
+ Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF));
if (Reverse) {
// If the address is consecutive but reversed, then the
@@ -2682,15 +2665,15 @@ void InnerLoopVectorizer::vectorizeMemor
Mask[Part] = reverseVector(Mask[Part]);
}
- Value *VecPtr = Builder.CreateBitCast(PartPtr,
- DataTy->getPointerTo(AddressSpace));
+ Value *VecPtr =
+ Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
if (Legal->isMaskRequired(LI))
NewLI = Builder.CreateMaskedLoad(VecPtr, Alignment, Mask[Part],
UndefValue::get(DataTy),
"wide.masked.load");
else
NewLI = Builder.CreateAlignedLoad(VecPtr, Alignment, "wide.load");
- Entry[Part] = Reverse ? reverseVector(NewLI) : NewLI;
+ Entry[Part] = Reverse ? reverseVector(NewLI) : NewLI;
}
addMetadata(NewLI, LI);
}
@@ -2737,8 +2720,9 @@ void InnerLoopVectorizer::scalarizeInstr
// Does this instruction return a value ?
bool IsVoidRetTy = Instr->getType()->isVoidTy();
- Value *UndefVec = IsVoidRetTy ? nullptr :
- UndefValue::get(VectorType::get(Instr->getType(), VF));
+ Value *UndefVec =
+ IsVoidRetTy ? nullptr
+ : UndefValue::get(VectorType::get(Instr->getType(), VF));
// Create a new entry in the WidenMap and initialize it to Undef or Null.
VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
@@ -2791,8 +2775,8 @@ void InnerLoopVectorizer::scalarizeInstr
Builder.getInt32(Width));
// End if-block.
if (IfPredicateStore)
- PredicatedStores.push_back(std::make_pair(cast<StoreInst>(Cloned),
- Cmp));
+ PredicatedStores.push_back(
+ std::make_pair(cast<StoreInst>(Cloned), Cmp));
}
}
}
@@ -2866,9 +2850,8 @@ Value *InnerLoopVectorizer::getOrCreateT
if (TripCount->getType()->isPointerTy())
TripCount =
- CastInst::CreatePointerCast(TripCount, IdxTy,
- "exitcount.ptrcnt.to.int",
- L->getLoopPreheader()->getTerminator());
+ CastInst::CreatePointerCast(TripCount, IdxTy, "exitcount.ptrcnt.to.int",
+ L->getLoopPreheader()->getTerminator());
return TripCount;
}
@@ -2913,13 +2896,11 @@ void InnerLoopVectorizer::emitMinimumIte
// Generate code to check that the loop's trip count that we computed by
// adding one to the backedge-taken count will not overflow.
- Value *CheckMinIters =
- Builder.CreateICmpULT(Count,
- ConstantInt::get(Count->getType(), VF * UF),
- "min.iters.check");
+ Value *CheckMinIters = Builder.CreateICmpULT(
+ Count, ConstantInt::get(Count->getType(), VF * UF), "min.iters.check");
- BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(),
- "min.iters.checked");
+ BasicBlock *NewBB =
+ BB->splitBasicBlock(BB->getTerminator(), "min.iters.checked");
// Update dominator tree immediately if the generated block is a
// LoopBypassBlock because SCEV expansions to generate loop bypass
// checks may query it before the current function is finished.
@@ -2944,8 +2925,7 @@ void InnerLoopVectorizer::emitVectorLoop
// Generate code to check that the loop's trip count that we computed by
// adding one to the backedge-taken count will not overflow.
- BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(),
- "vector.ph");
+ BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
// Update dominator tree immediately if the generated block is a
// LoopBypassBlock because SCEV expansions to generate loop bypass
// checks may query it before the current function is finished.
@@ -2987,8 +2967,7 @@ void InnerLoopVectorizer::emitSCEVChecks
AddedSafetyChecks = true;
}
-void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L,
- BasicBlock *Bypass) {
+void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
BasicBlock *BB = L->getLoopPreheader();
// Generate the code that checks in runtime if arrays overlap. We put the
@@ -3022,7 +3001,6 @@ void InnerLoopVectorizer::emitMemRuntime
LVer->prepareNoAliasMetadata();
}
-
void InnerLoopVectorizer::createEmptyLoop() {
/*
In this function we generate a new loop. The new loop will contain
@@ -3080,12 +3058,12 @@ void InnerLoopVectorizer::createEmptyLoo
BasicBlock *VecBody =
VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body");
BasicBlock *MiddleBlock =
- VecBody->splitBasicBlock(VecBody->getTerminator(), "middle.block");
+ VecBody->splitBasicBlock(VecBody->getTerminator(), "middle.block");
BasicBlock *ScalarPH =
- MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), "scalar.ph");
+ MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), "scalar.ph");
// Create and register the new vector loop.
- Loop* Lp = new Loop();
+ Loop *Lp = new Loop();
Loop *ParentLoop = OrigLoop->getParentLoop();
// Insert the new loop into the loop nest and register the new basic blocks
@@ -3127,8 +3105,8 @@ void InnerLoopVectorizer::createEmptyLoo
Value *CountRoundDown = getOrCreateVectorTripCount(Lp);
Constant *Step = ConstantInt::get(IdxTy, VF * UF);
Induction =
- createInductionVariable(Lp, StartIdx, CountRoundDown, Step,
- getDebugLocFromInstOrOperands(OldInduction));
+ createInductionVariable(Lp, StartIdx, CountRoundDown, Step,
+ getDebugLocFromInstOrOperands(OldInduction));
// We are going to resume the execution of the scalar loop.
// Go over all of the induction variables that we found and fix the
@@ -3148,18 +3126,16 @@ void InnerLoopVectorizer::createEmptyLoo
InductionDescriptor II = I->second;
// Create phi nodes to merge from the backedge-taken check block.
- PHINode *BCResumeVal = PHINode::Create(OrigPhi->getType(), 3,
- "bc.resume.val",
- ScalarPH->getTerminator());
+ PHINode *BCResumeVal = PHINode::Create(
+ OrigPhi->getType(), 3, "bc.resume.val", ScalarPH->getTerminator());
Value *EndValue;
if (OrigPhi == OldInduction) {
// We know what the end value is.
EndValue = CountRoundDown;
} else {
IRBuilder<> B(LoopBypassBlocks.back()->getTerminator());
- Value *CRD = B.CreateSExtOrTrunc(CountRoundDown,
- II.getStepValue()->getType(),
- "cast.crd");
+ Value *CRD = B.CreateSExtOrTrunc(
+ CountRoundDown, II.getStepValue()->getType(), "cast.crd");
EndValue = II.transform(B, CRD);
EndValue->setName("ind.end");
}
@@ -3181,9 +3157,9 @@ void InnerLoopVectorizer::createEmptyLoo
// Add a check in the middle block to see if we have completed
// all of the iterations in the first vector loop.
// If (N - N%VF) == N, then we *don't* need to run the remainder.
- Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count,
- CountRoundDown, "cmp.n",
- MiddleBlock->getTerminator());
+ Value *CmpN =
+ CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count,
+ CountRoundDown, "cmp.n", MiddleBlock->getTerminator());
ReplaceInstWithInst(MiddleBlock->getTerminator(),
BranchInst::Create(ExitBlock, ScalarPH, CmpN));
@@ -3238,9 +3214,7 @@ struct CSEDenseMapInfo {
/// = ...; " blocks. We start with one vectorized basic block. For every
/// conditional block we split this vectorized block. Therefore, every second
/// block will be a predicated one.
-static bool isPredicatedBlock(unsigned BlockNum) {
- return BlockNum % 2;
-}
+static bool isPredicatedBlock(unsigned BlockNum) { return BlockNum % 2; }
///\brief Perform cse of induction variable instructions.
static void cse(SmallVector<BasicBlock *, 4> &BBs) {
@@ -3274,7 +3248,7 @@ static void cse(SmallVector<BasicBlock *
/// \brief Adds a 'fast' flag to floating point operations.
static Value *addFastMathFlag(Value *V) {
- if (isa<FPMathOperator>(V)){
+ if (isa<FPMathOperator>(V)) {
FastMathFlags Flags;
Flags.setUnsafeAlgebra();
cast<Instruction>(V)->setFastMathFlags(Flags);
@@ -3397,8 +3371,8 @@ void InnerLoopVectorizer::truncateToMini
if (I->use_empty())
continue;
Type *OriginalTy = I->getType();
- Type *ScalarTruncatedTy = IntegerType::get(OriginalTy->getContext(),
- KV.second);
+ Type *ScalarTruncatedTy =
+ IntegerType::get(OriginalTy->getContext(), KV.second);
Type *TruncatedTy = VectorType::get(ScalarTruncatedTy,
OriginalTy->getVectorNumElements());
if (TruncatedTy == OriginalTy)
@@ -3408,7 +3382,7 @@ void InnerLoopVectorizer::truncateToMini
continue;
IRBuilder<> B(cast<Instruction>(I));
- auto ShrinkOperand = [&](Value *V) -> Value* {
+ auto ShrinkOperand = [&](Value *V) -> Value * {
if (auto *ZI = dyn_cast<ZExtInst>(V))
if (ZI->getSrcTy() == TruncatedTy)
return ZI->getOperand(0);
@@ -3419,44 +3393,42 @@ void InnerLoopVectorizer::truncateToMini
// unfortunately.
Value *NewI = nullptr;
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
- NewI = B.CreateBinOp(BO->getOpcode(),
- ShrinkOperand(BO->getOperand(0)),
+ NewI = B.CreateBinOp(BO->getOpcode(), ShrinkOperand(BO->getOperand(0)),
ShrinkOperand(BO->getOperand(1)));
cast<BinaryOperator>(NewI)->copyIRFlags(I);
} else if (ICmpInst *CI = dyn_cast<ICmpInst>(I)) {
- NewI = B.CreateICmp(CI->getPredicate(),
- ShrinkOperand(CI->getOperand(0)),
- ShrinkOperand(CI->getOperand(1)));
+ NewI =
+ B.CreateICmp(CI->getPredicate(), ShrinkOperand(CI->getOperand(0)),
+ ShrinkOperand(CI->getOperand(1)));
} else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
NewI = B.CreateSelect(SI->getCondition(),
ShrinkOperand(SI->getTrueValue()),
ShrinkOperand(SI->getFalseValue()));
} else if (CastInst *CI = dyn_cast<CastInst>(I)) {
switch (CI->getOpcode()) {
- default: llvm_unreachable("Unhandled cast!");
+ default:
+ llvm_unreachable("Unhandled cast!");
case Instruction::Trunc:
NewI = ShrinkOperand(CI->getOperand(0));
break;
case Instruction::SExt:
- NewI = B.CreateSExtOrTrunc(CI->getOperand(0),
- smallestIntegerVectorType(OriginalTy,
- TruncatedTy));
+ NewI = B.CreateSExtOrTrunc(
+ CI->getOperand(0),
+ smallestIntegerVectorType(OriginalTy, TruncatedTy));
break;
case Instruction::ZExt:
- NewI = B.CreateZExtOrTrunc(CI->getOperand(0),
- smallestIntegerVectorType(OriginalTy,
- TruncatedTy));
+ NewI = B.CreateZExtOrTrunc(
+ CI->getOperand(0),
+ smallestIntegerVectorType(OriginalTy, TruncatedTy));
break;
}
} else if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(I)) {
auto Elements0 = SI->getOperand(0)->getType()->getVectorNumElements();
- auto *O0 =
- B.CreateZExtOrTrunc(SI->getOperand(0),
- VectorType::get(ScalarTruncatedTy, Elements0));
+ auto *O0 = B.CreateZExtOrTrunc(
+ SI->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements0));
auto Elements1 = SI->getOperand(1)->getType()->getVectorNumElements();
- auto *O1 =
- B.CreateZExtOrTrunc(SI->getOperand(1),
- VectorType::get(ScalarTruncatedTy, Elements1));
+ auto *O1 = B.CreateZExtOrTrunc(
+ SI->getOperand(1), VectorType::get(ScalarTruncatedTy, Elements1));
NewI = B.CreateShuffleVector(O0, O1, SI->getMask());
} else if (isa<LoadInst>(I)) {
@@ -3525,8 +3497,8 @@ void InnerLoopVectorizer::vectorizeLoop(
DFS.perform(LI);
// Vectorize all of the blocks in the original loop.
- for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(),
- be = DFS.endRPO(); bb != be; ++bb)
+ for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(), be = DFS.endRPO();
+ bb != be; ++bb)
vectorizeBlockInLoop(*bb, &PHIsToFix);
// Insert truncates and extends for any truncated instructions as hints to
@@ -3614,10 +3586,10 @@ void InnerLoopVectorizer::vectorizeLoop(
// Make sure to add the reduction stat value only to the
// first unroll part.
Value *StartVal = (part == 0) ? VectorStart : Identity;
- cast<PHINode>(VecRdxPhi[part])->addIncoming(StartVal,
- LoopVectorPreHeader);
- cast<PHINode>(VecRdxPhi[part])->addIncoming(Val[part],
- LoopVectorBody.back());
+ cast<PHINode>(VecRdxPhi[part])
+ ->addIncoming(StartVal, LoopVectorPreHeader);
+ cast<PHINode>(VecRdxPhi[part])
+ ->addIncoming(Val[part], LoopVectorBody.back());
}
// Before each round, move the insertion point right between
@@ -3675,21 +3647,19 @@ void InnerLoopVectorizer::vectorizeLoop(
assert(isPowerOf2_32(VF) &&
"Reduction emission only supported for pow2 vectors!");
Value *TmpVec = ReducedPartRdx;
- SmallVector<Constant*, 32> ShuffleMask(VF, nullptr);
+ SmallVector<Constant *, 32> ShuffleMask(VF, nullptr);
for (unsigned i = VF; i != 1; i >>= 1) {
// Move the upper half of the vector to the lower half.
- for (unsigned j = 0; j != i/2; ++j)
- ShuffleMask[j] = Builder.getInt32(i/2 + j);
+ for (unsigned j = 0; j != i / 2; ++j)
+ ShuffleMask[j] = Builder.getInt32(i / 2 + j);
// Fill the rest of the mask with undef.
- std::fill(&ShuffleMask[i/2], ShuffleMask.end(),
+ std::fill(&ShuffleMask[i / 2], ShuffleMask.end(),
UndefValue::get(Builder.getInt32Ty()));
- Value *Shuf =
- Builder.CreateShuffleVector(TmpVec,
- UndefValue::get(TmpVec->getType()),
- ConstantVector::get(ShuffleMask),
- "rdx.shuf");
+ Value *Shuf = Builder.CreateShuffleVector(
+ TmpVec, UndefValue::get(TmpVec->getType()),
+ ConstantVector::get(ShuffleMask), "rdx.shuf");
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
// Floating point operations had to be 'fast' to enable the reduction.
@@ -3701,8 +3671,8 @@ void InnerLoopVectorizer::vectorizeLoop(
}
// The result is in the first element of the vector.
- ReducedPartRdx = Builder.CreateExtractElement(TmpVec,
- Builder.getInt32(0));
+ ReducedPartRdx =
+ Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
// If the reduction can be performed in a smaller type, we need to extend
// the reduction to the wider type before we branch to the original loop.
@@ -3726,9 +3696,11 @@ void InnerLoopVectorizer::vectorizeLoop(
// We know that the loop is in LCSSA form. We need to update the
// PHI nodes in the exit blocks.
for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
- LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
+ LEE = LoopExitBlock->end();
+ LEI != LEE; ++LEI) {
PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
- if (!LCSSAPhi) break;
+ if (!LCSSAPhi)
+ break;
// All PHINodes need to have a single entry edge, or two if
// we already fixed them.
@@ -3741,7 +3713,7 @@ void InnerLoopVectorizer::vectorizeLoop(
LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
break;
}
- }// end of the LCSSA phi scan.
+ } // end of the LCSSA phi scan.
// Fix the scalar loop reduction variable with the incoming reduction sum
// from the vector body and from the backedge value.
@@ -3928,9 +3900,11 @@ void InnerLoopVectorizer::fixFirstOrderR
void InnerLoopVectorizer::fixLCSSAPHIs() {
for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
- LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
+ LEE = LoopExitBlock->end();
+ LEI != LEE; ++LEI) {
PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
- if (!LCSSAPhi) break;
+ if (!LCSSAPhi)
+ break;
if (LCSSAPhi->getNumIncomingValues() == 1)
LCSSAPhi->addIncoming(UndefValue::get(LCSSAPhi->getType()),
LoopMiddleBlock);
@@ -3943,7 +3917,7 @@ InnerLoopVectorizer::createEdgeMask(Basi
"Invalid edge");
// Look for cached value.
- std::pair<BasicBlock*, BasicBlock*> Edge(Src, Dst);
+ std::pair<BasicBlock *, BasicBlock *> Edge(Src, Dst);
EdgeMaskCache::iterator ECEntryIt = MaskCache.find(Edge);
if (ECEntryIt != MaskCache.end())
return ECEntryIt->second;
@@ -3999,13 +3973,13 @@ InnerLoopVectorizer::createBlockInMask(B
void InnerLoopVectorizer::widenPHIInstruction(
Instruction *PN, InnerLoopVectorizer::VectorParts &Entry, unsigned UF,
unsigned VF, PhiVector *PV) {
- PHINode* P = cast<PHINode>(PN);
+ PHINode *P = cast<PHINode>(PN);
// Handle recurrences.
if (Legal->isReductionVariable(P) || Legal->isFirstOrderRecurrence(P)) {
for (unsigned part = 0; part < UF; ++part) {
// This is phase one of vectorizing PHIs.
- Type *VecTy = (VF == 1) ? PN->getType() :
- VectorType::get(PN->getType(), VF);
+ Type *VecTy =
+ (VF == 1) ? PN->getType() : VectorType::get(PN->getType(), VF);
Entry[part] = PHINode::Create(
VecTy, 2, "vec.phi", &*LoopVectorBody.back()->getFirstInsertionPt());
}
@@ -4030,21 +4004,20 @@ void InnerLoopVectorizer::widenPHIInstru
// SELECT(Mask2, In2,
// ( ...)))
for (unsigned In = 0; In < NumIncoming; In++) {
- VectorParts Cond = createEdgeMask(P->getIncomingBlock(In),
- P->getParent());
+ VectorParts Cond =
+ createEdgeMask(P->getIncomingBlock(In), P->getParent());
VectorParts &In0 = getVectorValue(P->getIncomingValue(In));
for (unsigned part = 0; part < UF; ++part) {
// We might have single edge PHIs (blocks) - use an identity
// 'select' for the first PHI operand.
if (In == 0)
- Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
- In0[part]);
+ Entry[part] = Builder.CreateSelect(Cond[part], In0[part], In0[part]);
else
// Select between the current value and the previous incoming edge
// based on the incoming mask.
- Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
- Entry[part], "predphi");
+ Entry[part] = Builder.CreateSelect(Cond[part], In0[part], Entry[part],
+ "predphi");
}
}
return;
@@ -4052,67 +4025,64 @@ void InnerLoopVectorizer::widenPHIInstru
// This PHINode must be an induction variable.
// Make sure that we know about it.
- assert(Legal->getInductionVars()->count(P) &&
- "Not an induction variable");
+ assert(Legal->getInductionVars()->count(P) && "Not an induction variable");
InductionDescriptor II = Legal->getInductionVars()->lookup(P);
// FIXME: The newly created binary instructions should contain nsw/nuw flags,
// which can be found from the original scalar operations.
switch (II.getKind()) {
- case InductionDescriptor::IK_NoInduction:
- llvm_unreachable("Unknown induction");
- case InductionDescriptor::IK_IntInduction: {
- assert(P->getType() == II.getStartValue()->getType() &&
- "Types must match");
- // Handle other induction variables that are now based on the
- // canonical one.
- Value *V = Induction;
- if (P != OldInduction) {
- V = Builder.CreateSExtOrTrunc(Induction, P->getType());
- V = II.transform(Builder, V);
- V->setName("offset.idx");
- }
- Value *Broadcasted = getBroadcastInstrs(V);
- // After broadcasting the induction variable we need to make the vector
- // consecutive by adding 0, 1, 2, etc.
- for (unsigned part = 0; part < UF; ++part)
- Entry[part] = getStepVector(Broadcasted, VF * part, II.getStepValue());
- return;
- }
- case InductionDescriptor::IK_PtrInduction:
- // Handle the pointer induction variable case.
- assert(P->getType()->isPointerTy() && "Unexpected type.");
- // This is the normalized GEP that starts counting at zero.
- Value *PtrInd = Induction;
- PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStepValue()->getType());
- // This is the vector of results. Notice that we don't generate
- // vector geps because scalar geps result in better code.
- for (unsigned part = 0; part < UF; ++part) {
- if (VF == 1) {
- int EltIndex = part;
- Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex);
- Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
- Value *SclrGep = II.transform(Builder, GlobalIdx);
- SclrGep->setName("next.gep");
- Entry[part] = SclrGep;
- continue;
- }
+ case InductionDescriptor::IK_NoInduction:
+ llvm_unreachable("Unknown induction");
+ case InductionDescriptor::IK_IntInduction: {
+ assert(P->getType() == II.getStartValue()->getType() && "Types must match");
+ // Handle other induction variables that are now based on the
+ // canonical one.
+ Value *V = Induction;
+ if (P != OldInduction) {
+ V = Builder.CreateSExtOrTrunc(Induction, P->getType());
+ V = II.transform(Builder, V);
+ V->setName("offset.idx");
+ }
+ Value *Broadcasted = getBroadcastInstrs(V);
+ // After broadcasting the induction variable we need to make the vector
+ // consecutive by adding 0, 1, 2, etc.
+ for (unsigned part = 0; part < UF; ++part)
+ Entry[part] = getStepVector(Broadcasted, VF * part, II.getStepValue());
+ return;
+ }
+ case InductionDescriptor::IK_PtrInduction:
+ // Handle the pointer induction variable case.
+ assert(P->getType()->isPointerTy() && "Unexpected type.");
+ // This is the normalized GEP that starts counting at zero.
+ Value *PtrInd = Induction;
+ PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStepValue()->getType());
+ // This is the vector of results. Notice that we don't generate
+ // vector geps because scalar geps result in better code.
+ for (unsigned part = 0; part < UF; ++part) {
+ if (VF == 1) {
+ int EltIndex = part;
+ Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex);
+ Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
+ Value *SclrGep = II.transform(Builder, GlobalIdx);
+ SclrGep->setName("next.gep");
+ Entry[part] = SclrGep;
+ continue;
+ }
- Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
- for (unsigned int i = 0; i < VF; ++i) {
- int EltIndex = i + part * VF;
- Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex);
- Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
- Value *SclrGep = II.transform(Builder, GlobalIdx);
- SclrGep->setName("next.gep");
- VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
- Builder.getInt32(i),
- "insert.gep");
- }
- Entry[part] = VecVal;
+ Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
+ for (unsigned int i = 0; i < VF; ++i) {
+ int EltIndex = i + part * VF;
+ Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex);
+ Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
+ Value *SclrGep = II.transform(Builder, GlobalIdx);
+ SclrGep->setName("next.gep");
+ VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
+ Builder.getInt32(i), "insert.gep");
}
- return;
+ Entry[part] = VecVal;
+ }
+ return;
}
}
@@ -4130,7 +4100,7 @@ void InnerLoopVectorizer::vectorizeBlock
// Vectorize PHINodes.
widenPHIInstruction(&*it, Entry, UF, VF, PV);
continue;
- }// End of PHI.
+ } // End of PHI.
case Instruction::Add:
case Instruction::FAdd:
@@ -4183,17 +4153,17 @@ void InnerLoopVectorizer::vectorizeBlock
// We have to take the 'vectorized' value and pick the first lane.
// Instcombine will make this a no-op.
VectorParts &Cond = getVectorValue(it->getOperand(0));
- VectorParts &Op0 = getVectorValue(it->getOperand(1));
- VectorParts &Op1 = getVectorValue(it->getOperand(2));
+ VectorParts &Op0 = getVectorValue(it->getOperand(1));
+ VectorParts &Op1 = getVectorValue(it->getOperand(2));
- Value *ScalarCond = (VF == 1) ? Cond[0] :
- Builder.CreateExtractElement(Cond[0], Builder.getInt32(0));
+ Value *ScalarCond =
+ (VF == 1)
+ ? Cond[0]
+ : Builder.CreateExtractElement(Cond[0], Builder.getInt32(0));
for (unsigned Part = 0; Part < UF; ++Part) {
Entry[Part] = Builder.CreateSelect(
- InvariantCond ? ScalarCond : Cond[Part],
- Op0[Part],
- Op1[Part]);
+ InvariantCond ? ScalarCond : Cond[Part], Op0[Part], Op1[Part]);
}
addMetadata(Entry, &*it);
@@ -4226,7 +4196,7 @@ void InnerLoopVectorizer::vectorizeBlock
case Instruction::Store:
case Instruction::Load:
vectorizeMemoryInstruction(&*it);
- break;
+ break;
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPToUI:
@@ -4247,8 +4217,8 @@ void InnerLoopVectorizer::vectorizeBlock
/// c. other casts depend on pointer size.
if (CI->getOperand(0) == OldInduction &&
it->getOpcode() == Instruction::Trunc) {
- Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction,
- CI->getType());
+ Value *ScalarCast =
+ Builder.CreateCast(CI->getOpcode(), Induction, CI->getType());
Value *Broadcasted = getBroadcastInstrs(ScalarCast);
InductionDescriptor II =
Legal->getInductionVars()->lookup(OldInduction);
@@ -4260,8 +4230,8 @@ void InnerLoopVectorizer::vectorizeBlock
break;
}
/// Vectorize casts.
- Type *DestTy = (VF == 1) ? CI->getType() :
- VectorType::get(CI->getType(), VF);
+ Type *DestTy =
+ (VF == 1) ? CI->getType() : VectorType::get(CI->getType(), VF);
VectorParts &A = getVectorValue(it->getOperand(0));
for (unsigned Part = 0; Part < UF; ++Part)
@@ -4287,9 +4257,8 @@ void InnerLoopVectorizer::vectorizeBlock
Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF));
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
- if (ID &&
- (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end ||
- ID == Intrinsic::lifetime_start)) {
+ if (ID && (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end ||
+ ID == Intrinsic::lifetime_start)) {
scalarizeInstruction(&*it);
break;
}
@@ -4358,8 +4327,8 @@ void InnerLoopVectorizer::vectorizeBlock
// All other instructions are unsupported. Scalarize them.
scalarizeInstruction(&*it);
break;
- }// end of switch.
- }// end of for_each instr.
+ } // end of switch.
+ } // end of for_each instr.
}
void InnerLoopVectorizer::updateAnalysis() {
@@ -4413,7 +4382,8 @@ bool LoopVectorizationLegality::canVecto
// Collect safe addresses.
for (Loop::block_iterator BI = TheLoop->block_begin(),
- BE = TheLoop->block_end(); BI != BE; ++BI) {
+ BE = TheLoop->block_end();
+ BI != BE; ++BI) {
BasicBlock *BB = *BI;
if (blockNeedsPredication(BB))
@@ -4430,7 +4400,8 @@ bool LoopVectorizationLegality::canVecto
// Collect the blocks that need predication.
BasicBlock *Header = TheLoop->getHeader();
for (Loop::block_iterator BI = TheLoop->block_begin(),
- BE = TheLoop->block_end(); BI != BE; ++BI) {
+ BE = TheLoop->block_end();
+ BI != BE; ++BI) {
BasicBlock *BB = *BI;
// We don't support switch statements inside loops.
@@ -4462,9 +4433,8 @@ bool LoopVectorizationLegality::canVecto
// We must have a loop in canonical form. Loops with indirectbr in them cannot
// be canonicalized.
if (!TheLoop->getLoopPreheader()) {
- emitAnalysis(
- VectorizationReport() <<
- "loop control flow is not understood by vectorizer");
+ emitAnalysis(VectorizationReport()
+ << "loop control flow is not understood by vectorizer");
return false;
}
@@ -4476,17 +4446,15 @@ bool LoopVectorizationLegality::canVecto
// We must have a single backedge.
if (TheLoop->getNumBackEdges() != 1) {
- emitAnalysis(
- VectorizationReport() <<
- "loop control flow is not understood by vectorizer");
+ emitAnalysis(VectorizationReport()
+ << "loop control flow is not understood by vectorizer");
return false;
}
// We must have a single exiting block.
if (!TheLoop->getExitingBlock()) {
- emitAnalysis(
- VectorizationReport() <<
- "loop control flow is not understood by vectorizer");
+ emitAnalysis(VectorizationReport()
+ << "loop control flow is not understood by vectorizer");
return false;
}
@@ -4494,15 +4462,14 @@ bool LoopVectorizationLegality::canVecto
// checked at the end of each iteration. With that we can assume that all
// instructions in the loop are executed the same number of times.
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
- emitAnalysis(
- VectorizationReport() <<
- "loop control flow is not understood by vectorizer");
+ emitAnalysis(VectorizationReport()
+ << "loop control flow is not understood by vectorizer");
return false;
}
// We need to have a loop header.
- DEBUG(dbgs() << "LV: Found a loop: " <<
- TheLoop->getHeader()->getName() << '\n');
+ DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
+ << '\n');
// Check if we can if-convert non-single-bb loops.
unsigned NumBlocks = TheLoop->getNumBlocks();
@@ -4581,7 +4548,7 @@ static Type *convertPointerToIntegerType
return Ty;
}
-static Type* getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
+static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
Ty0 = convertPointerToIntegerType(DL, Ty0);
Ty1 = convertPointerToIntegerType(DL, Ty1);
if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
@@ -4596,7 +4563,7 @@ static bool hasOutsideLoopUser(const Loo
// Reduction instructions are allowed to have exit users. All other
// instructions must not have external users.
if (!Reductions.count(Inst))
- //Check that all of the users of the loop are inside the BB.
+ // Check that all of the users of the loop are inside the BB.
for (User *U : Inst->users()) {
Instruction *UI = cast<Instruction>(U);
// This user may be a reduction exit value.
@@ -4619,7 +4586,8 @@ bool LoopVectorizationLegality::canVecto
// For each block in the loop.
for (Loop::block_iterator bb = TheLoop->block_begin(),
- be = TheLoop->block_end(); bb != be; ++bb) {
+ be = TheLoop->block_end();
+ bb != be; ++bb) {
// Scan the instructions in the block and look for hazards.
for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
@@ -4628,8 +4596,7 @@ bool LoopVectorizationLegality::canVecto
if (PHINode *Phi = dyn_cast<PHINode>(it)) {
Type *PhiTy = Phi->getType();
// Check that this PHI type is allowed.
- if (!PhiTy->isIntegerTy() &&
- !PhiTy->isFloatingPointTy() &&
+ if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
!PhiTy->isPointerTy()) {
emitAnalysis(VectorizationReport(&*it)
<< "loop control flow is not understood by vectorizer");
@@ -4645,9 +4612,9 @@ bool LoopVectorizationLegality::canVecto
// identified reduction value with an outside user.
if (!hasOutsideLoopUser(TheLoop, &*it, AllowedExit))
continue;
- emitAnalysis(VectorizationReport(&*it) <<
- "value could not be identified as "
- "an induction or reduction variable");
+ emitAnalysis(VectorizationReport(&*it)
+ << "value could not be identified as "
+ "an induction or reduction variable");
return false;
}
@@ -4670,9 +4637,8 @@ bool LoopVectorizationLegality::canVecto
// Int inductions are special because we only allow one IV.
if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
- ID.getStepValue()->isOne() &&
- isa<Constant>(ID.getStartValue()) &&
- cast<Constant>(ID.getStartValue())->isNullValue()) {
+ ID.getStepValue()->isOne() && isa<Constant>(ID.getStartValue()) &&
+ cast<Constant>(ID.getStartValue())->isNullValue()) {
// Use the phi node with the widest type as induction. Use the last
// one if there are multiple (no good reason for doing this other
// than it is expedient). We've checked that it begins at zero and
@@ -4686,9 +4652,9 @@ bool LoopVectorizationLegality::canVecto
// Until we explicitly handle the case of an induction variable with
// an outside loop user we have to give up vectorizing this loop.
if (hasOutsideLoopUser(TheLoop, &*it, AllowedExit)) {
- emitAnalysis(VectorizationReport(&*it) <<
- "use of induction value outside of the "
- "loop is not handled by vectorizer");
+ emitAnalysis(VectorizationReport(&*it)
+ << "use of induction value outside of the "
+ "loop is not handled by vectorizer");
return false;
}
@@ -4709,19 +4675,20 @@ bool LoopVectorizationLegality::canVecto
continue;
}
- emitAnalysis(VectorizationReport(&*it) <<
- "value that could not be identified as "
- "reduction is used outside the loop");
- DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
+ emitAnalysis(VectorizationReport(&*it)
+ << "value that could not be identified as "
+ "reduction is used outside the loop");
+ DEBUG(dbgs() << "LV: Found an unidentified PHI." << *Phi << "\n");
return false;
- }// end of PHI handling
+ } // end of PHI handling
// We handle calls that:
// * Are debug info intrinsics.
// * Have a mapping to an IR intrinsic.
// * Have a vector version available.
CallInst *CI = dyn_cast<CallInst>(it);
- if (CI && !getVectorIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI) &&
+ if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
+ !isa<DbgInfoIntrinsic>(CI) &&
!(CI->getCalledFunction() && TLI &&
TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
emitAnalysis(VectorizationReport(&*it)
@@ -4732,8 +4699,8 @@ bool LoopVectorizationLegality::canVecto
// Intrinsics such as powi,cttz and ctlz are legal to vectorize if the
// second argument is the same (i.e. loop invariant)
- if (CI &&
- hasVectorInstrinsicScalarOpd(getVectorIntrinsicIDForCall(CI, TLI), 1)) {
+ if (CI && hasVectorInstrinsicScalarOpd(
+ getVectorIntrinsicIDForCall(CI, TLI), 1)) {
auto *SE = PSE.getSE();
if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) {
emitAnalysis(VectorizationReport(&*it)
@@ -4746,7 +4713,8 @@ bool LoopVectorizationLegality::canVecto
// Check that the instruction return type is vectorizable.
// Also, we can't vectorize extractelement instructions.
if ((!VectorType::isValidElementType(it->getType()) &&
- !it->getType()->isVoidTy()) || isa<ExtractElementInst>(it)) {
+ !it->getType()->isVoidTy()) ||
+ isa<ExtractElementInst>(it)) {
emitAnalysis(VectorizationReport(&*it)
<< "instruction return type cannot be vectorized");
DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
@@ -4757,8 +4725,8 @@ bool LoopVectorizationLegality::canVecto
if (StoreInst *ST = dyn_cast<StoreInst>(it)) {
Type *T = ST->getValueOperand()->getType();
if (!VectorType::isValidElementType(T)) {
- emitAnalysis(VectorizationReport(ST) <<
- "store instruction cannot be vectorized");
+ emitAnalysis(VectorizationReport(ST)
+ << "store instruction cannot be vectorized");
return false;
}
if (EnableMemAccessVersioning)
@@ -4768,14 +4736,13 @@ bool LoopVectorizationLegality::canVecto
if (EnableMemAccessVersioning)
collectStridedAccess(LI);
- // FP instructions can allow unsafe algebra, thus vectorizable by
- // non-IEEE-754 compliant SIMD units.
- // This applies to floating-point math operations and calls, not memory
- // operations, shuffles, or casts, as they don't change precision or
- // semantics.
+ // FP instructions can allow unsafe algebra, thus vectorizable by
+ // non-IEEE-754 compliant SIMD units.
+ // This applies to floating-point math operations and calls, not memory
+ // operations, shuffles, or casts, as they don't change precision or
+ // semantics.
} else if (it->getType()->isFloatingPointTy() &&
- (CI || it->isBinaryOp()) &&
- !it->hasUnsafeAlgebra()) {
+ (CI || it->isBinaryOp()) && !it->hasUnsafeAlgebra()) {
DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
Hints->setPotentiallyUnsafe();
}
@@ -4783,13 +4750,12 @@ bool LoopVectorizationLegality::canVecto
// Reduction instructions are allowed to have exit users.
// All other instructions must not have external users.
if (hasOutsideLoopUser(TheLoop, &*it, AllowedExit)) {
- emitAnalysis(VectorizationReport(&*it) <<
- "value cannot be used outside the loop");
+ emitAnalysis(VectorizationReport(&*it)
+ << "value cannot be used outside the loop");
return false;
}
} // next instr.
-
}
if (!Induction) {
@@ -4832,7 +4798,7 @@ void LoopVectorizationLegality::collectS
void LoopVectorizationLegality::collectLoopUniforms() {
// We now know that the loop is vectorizable!
// Collect variables that will remain uniform after vectorization.
- std::vector<Value*> Worklist;
+ std::vector<Value *> Worklist;
BasicBlock *Latch = TheLoop->getLoopLatch();
// Start with the conditional branch and walk up the block.
@@ -4842,9 +4808,9 @@ void LoopVectorizationLegality::collectL
// after vectorization (and subsequent cleanup) and, until revectorization is
// supported, all dependencies must also be uniform.
for (Loop::block_iterator B = TheLoop->block_begin(),
- BE = TheLoop->block_end(); B != BE; ++B)
- for (BasicBlock::iterator I = (*B)->begin(), IE = (*B)->end();
- I != IE; ++I)
+ BE = TheLoop->block_end();
+ B != BE; ++B)
+ for (BasicBlock::iterator I = (*B)->begin(), IE = (*B)->end(); I != IE; ++I)
if (I->getType()->isPointerTy() && isConsecutivePtr(&*I))
Worklist.insert(Worklist.end(), I->op_begin(), I->op_end());
@@ -4889,7 +4855,7 @@ bool LoopVectorizationLegality::canVecto
}
bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
- Value *In0 = const_cast<Value*>(V);
+ Value *In0 = const_cast<Value *>(V);
PHINode *PN = dyn_cast_or_null<PHINode>(In0);
if (!PN)
return false;
@@ -4901,12 +4867,12 @@ bool LoopVectorizationLegality::isFirstO
return FirstOrderRecurrences.count(Phi);
}
-bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
+bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
}
-bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
- SmallPtrSetImpl<Value *> &SafePtrs) {
+bool LoopVectorizationLegality::blockCanBePredicated(
+ BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs) {
const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
@@ -4963,7 +4929,8 @@ bool LoopVectorizationLegality::blockCan
// The instructions below can trap.
switch (it->getOpcode()) {
- default: continue;
+ default:
+ continue;
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::URem:
@@ -5157,20 +5124,22 @@ void InterleavedAccessInfo::analyzeInter
LoopVectorizationCostModel::VectorizationFactor
LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
// Width 1 means no vectorize
- VectorizationFactor Factor = { 1U, 0U };
+ VectorizationFactor Factor = {1U, 0U};
if (OptForSize && Legal->getRuntimePointerChecking()->Need) {
- emitAnalysis(VectorizationReport() <<
- "runtime pointer checks needed. Enable vectorization of this "
- "loop with '#pragma clang loop vectorize(enable)' when "
- "compiling with -Os/-Oz");
- DEBUG(dbgs() <<
- "LV: Aborting. Runtime ptr check is required with -Os/-Oz.\n");
+ emitAnalysis(
+ VectorizationReport()
+ << "runtime pointer checks needed. Enable vectorization of this "
+ "loop with '#pragma clang loop vectorize(enable)' when "
+ "compiling with -Os/-Oz");
+ DEBUG(dbgs()
+ << "LV: Aborting. Runtime ptr check is required with -Os/-Oz.\n");
return Factor;
}
if (!EnableCondStoresVectorization && Legal->getNumPredStores()) {
- emitAnalysis(VectorizationReport() <<
- "store that is conditionally executed prevents vectorization");
+ emitAnalysis(
+ VectorizationReport()
+ << "store that is conditionally executed prevents vectorization");
DEBUG(dbgs() << "LV: No vectorization. There are conditional stores.\n");
return Factor;
}
@@ -5186,14 +5155,14 @@ LoopVectorizationCostModel::selectVector
unsigned MaxSafeDepDist = -1U;
if (Legal->getMaxSafeDepDistBytes() != -1U)
MaxSafeDepDist = Legal->getMaxSafeDepDistBytes() * 8;
- WidestRegister = ((WidestRegister < MaxSafeDepDist) ?
- WidestRegister : MaxSafeDepDist);
+ WidestRegister =
+ ((WidestRegister < MaxSafeDepDist) ? WidestRegister : MaxSafeDepDist);
unsigned MaxVectorSize = WidestRegister / WidestType;
DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType << " / "
<< WidestType << " bits.\n");
- DEBUG(dbgs() << "LV: The Widest register is: "
- << WidestRegister << " bits.\n");
+ DEBUG(dbgs() << "LV: The Widest register is: " << WidestRegister
+ << " bits.\n");
if (MaxVectorSize == 0) {
DEBUG(dbgs() << "LV: The target has no vector registers.\n");
@@ -5201,7 +5170,7 @@ LoopVectorizationCostModel::selectVector
}
assert(MaxVectorSize <= 64 && "Did not expect to pack so many elements"
- " into one vector!");
+ " into one vector!");
unsigned VF = MaxVectorSize;
if (MaximizeBandwidth && !OptForSize) {
@@ -5229,9 +5198,9 @@ LoopVectorizationCostModel::selectVector
if (OptForSize) {
// If we are unable to calculate the trip count then don't try to vectorize.
if (TC < 2) {
- emitAnalysis
- (VectorizationReport() <<
- "unable to calculate the loop count due to complex control flow");
+ emitAnalysis(
+ VectorizationReport()
+ << "unable to calculate the loop count due to complex control flow");
DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n");
return Factor;
}
@@ -5244,11 +5213,11 @@ LoopVectorizationCostModel::selectVector
else {
// If the trip count that we found modulo the vectorization factor is not
// zero then we require a tail.
- emitAnalysis(VectorizationReport() <<
- "cannot optimize for size and vectorize at the "
- "same time. Enable vectorization of this loop "
- "with '#pragma clang loop vectorize(enable)' "
- "when compiling with -Os/-Oz");
+ emitAnalysis(VectorizationReport()
+ << "cannot optimize for size and vectorize at the "
+ "same time. Enable vectorization of this loop "
+ "with '#pragma clang loop vectorize(enable)' "
+ "when compiling with -Os/-Oz");
DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n");
return Factor;
}
@@ -5277,17 +5246,18 @@ LoopVectorizationCostModel::selectVector
Cost = expectedCost(Width).first / (float)Width;
}
- for (unsigned i=2; i <= VF; i*=2) {
+ for (unsigned i = 2; i <= VF; i *= 2) {
// Notice that the vector loop needs to be executed less times, so
// we need to divide the cost of the vector loops by the width of
// the vector elements.
VectorizationCostTy C = expectedCost(i);
float VectorCost = C.first / (float)i;
- DEBUG(dbgs() << "LV: Vector loop of width " << i << " costs: " <<
- (int)VectorCost << ".\n");
+ DEBUG(dbgs() << "LV: Vector loop of width " << i
+ << " costs: " << (int)VectorCost << ".\n");
if (!C.second && !ForceVectorization) {
- DEBUG(dbgs() << "LV: Not considering vector loop of width " << i <<
- " because it will not generate any vector instructions.\n");
+ DEBUG(
+ dbgs() << "LV: Not considering vector loop of width " << i
+ << " because it will not generate any vector instructions.\n");
continue;
}
if (VectorCost < Cost) {
@@ -5299,7 +5269,7 @@ LoopVectorizationCostModel::selectVector
DEBUG(if (ForceVectorization && Width > 1 && Cost >= ScalarCost) dbgs()
<< "LV: Vectorization seems to be not beneficial, "
<< "but was forced by a user.\n");
- DEBUG(dbgs() << "LV: Selecting VF: "<< Width << ".\n");
+ DEBUG(dbgs() << "LV: Selecting VF: " << Width << ".\n");
Factor.Width = Width;
Factor.Cost = Width * Cost;
return Factor;
@@ -5313,7 +5283,8 @@ LoopVectorizationCostModel::getSmallestA
// For each block.
for (Loop::block_iterator bb = TheLoop->block_begin(),
- be = TheLoop->block_end(); bb != be; ++bb) {
+ be = TheLoop->block_end();
+ bb != be; ++bb) {
BasicBlock *BB = *bb;
// For each instruction in the loop.
@@ -5389,8 +5360,8 @@ unsigned LoopVectorizationCostModel::sel
return 1;
unsigned TargetNumRegisters = TTI.getNumberOfRegisters(VF > 1);
- DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters <<
- " registers\n");
+ DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters
+ << " registers\n");
if (VF == 1) {
if (ForceTargetNumScalarRegs.getNumOccurrences() > 0)
@@ -5479,8 +5450,7 @@ unsigned LoopVectorizationCostModel::sel
// by this point), we can increase the critical path length if the loop
// we're interleaving is inside another loop. Limit, by default to 2, so the
// critical path only gets increased by one reduction operation.
- if (Legal->getReductionVars()->size() &&
- TheLoop->getLoopDepth() > 1) {
+ if (Legal->getReductionVars()->size() && TheLoop->getLoopDepth() > 1) {
unsigned F = static_cast<unsigned>(MaxNestedScalarReductionIC);
SmallIC = std::min(SmallIC, F);
StoresIC = std::min(StoresIC, F);
@@ -5537,20 +5507,20 @@ LoopVectorizationCostModel::calculateReg
// Each 'key' in the map opens a new interval. The values
// of the map are the index of the 'last seen' usage of the
// instruction that is the key.
- typedef DenseMap<Instruction*, unsigned> IntervalMap;
+ typedef DenseMap<Instruction *, unsigned> IntervalMap;
// Maps instruction to its index.
- DenseMap<unsigned, Instruction*> IdxToInstr;
+ DenseMap<unsigned, Instruction *> IdxToInstr;
// Marks the end of each interval.
IntervalMap EndPoint;
// Saves the list of instruction indices that are used in the loop.
- SmallSet<Instruction*, 8> Ends;
+ SmallSet<Instruction *, 8> Ends;
// Saves the list of values that are used in the loop but are
// defined outside the loop, such as arguments and constants.
- SmallPtrSet<Value*, 8> LoopInvariants;
+ SmallPtrSet<Value *, 8> LoopInvariants;
unsigned Index = 0;
- for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(),
- be = DFS.endRPO(); bb != be; ++bb) {
+ for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(), be = DFS.endRPO();
+ bb != be; ++bb) {
RU.NumInstructions += (*bb)->size();
for (Instruction &I : **bb) {
IdxToInstr[Index++] = &I;
@@ -5561,7 +5531,8 @@ LoopVectorizationCostModel::calculateReg
Instruction *Instr = dyn_cast<Instruction>(U);
// Ignore non-instruction values such as arguments, constants, etc.
- if (!Instr) continue;
+ if (!Instr)
+ continue;
// If this instruction is outside the loop then record it and continue.
if (!TheLoop->contains(Instr)) {
@@ -5577,15 +5548,15 @@ LoopVectorizationCostModel::calculateReg
}
// Saves the list of intervals that end with the index in 'key'.
- typedef SmallVector<Instruction*, 2> InstrList;
+ typedef SmallVector<Instruction *, 2> InstrList;
DenseMap<unsigned, InstrList> TransposeEnds;
// Transpose the EndPoints to a list of values that end at each index.
- for (IntervalMap::iterator it = EndPoint.begin(), e = EndPoint.end();
- it != e; ++it)
+ for (IntervalMap::iterator it = EndPoint.begin(), e = EndPoint.end(); it != e;
+ ++it)
TransposeEnds[it->second].push_back(it->first);
- SmallSet<Instruction*, 8> OpenIntervals;
+ SmallSet<Instruction *, 8> OpenIntervals;
// Get the size of the widest register.
unsigned MaxSafeDepDist = -1U;
@@ -5611,7 +5582,8 @@ LoopVectorizationCostModel::calculateReg
for (unsigned int i = 0; i < Index; ++i) {
Instruction *I = IdxToInstr[i];
// Ignore instructions that are never used within the loop.
- if (!Ends.count(I)) continue;
+ if (!Ends.count(I))
+ continue;
// Skip ignored values.
if (ValuesToIgnore.count(I))
@@ -5652,7 +5624,7 @@ LoopVectorizationCostModel::calculateReg
Invariant += GetRegUsage(Inst->getType(), VFs[i]);
}
- DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n');
+ DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n');
DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n');
DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << '\n');
DEBUG(dbgs() << "LV(REG): LoopSize: " << RU.NumInstructions << '\n');
@@ -5671,7 +5643,8 @@ LoopVectorizationCostModel::expectedCost
// For each block.
for (Loop::block_iterator bb = TheLoop->block_begin(),
- be = TheLoop->block_end(); bb != be; ++bb) {
+ be = TheLoop->block_end();
+ bb != be; ++bb) {
VectorizationCostTy BlockCost;
BasicBlock *BB = *bb;
@@ -5693,8 +5666,8 @@ LoopVectorizationCostModel::expectedCost
BlockCost.first += C.first;
BlockCost.second |= C.second;
- DEBUG(dbgs() << "LV: Found an estimated cost of " << C.first <<
- " for VF " << VF << " For instruction: " << *it << '\n');
+ DEBUG(dbgs() << "LV: Found an estimated cost of " << C.first << " for VF "
+ << VF << " For instruction: " << *it << '\n');
}
// We assume that if-converted blocks have a 50% chance of being executed.
@@ -5720,7 +5693,7 @@ static bool isGatherOrScatterLegal(Instr
LoopVectorizationLegality *Legal) {
Type *DataTy = cast<PointerType>(Ptr->getType())->getElementType();
return (isa<LoadInst>(I) && Legal->isLegalMaskedGather(DataTy)) ||
- (isa<StoreInst>(I) && Legal->isLegalMaskedScatter(DataTy));
+ (isa<StoreInst>(I) && Legal->isLegalMaskedScatter(DataTy));
}
/// \brief Check whether the address computation for a non-consecutive memory
@@ -5791,14 +5764,14 @@ LoopVectorizationCostModel::getInstructi
Type *VectorTy;
unsigned C = getInstructionCost(I, VF, VectorTy);
- bool TypeNotScalarized = VF > 1 && !VectorTy->isVoidTy() &&
- TTI.getNumberOfParts(VectorTy) < VF;
+ bool TypeNotScalarized =
+ VF > 1 && !VectorTy->isVoidTy() && TTI.getNumberOfParts(VectorTy) < VF;
return VectorizationCostTy(C, TypeNotScalarized);
}
-unsigned
-LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF,
- Type *&VectorTy) {
+unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
+ unsigned VF,
+ Type *&VectorTy) {
Type *RetTy = I->getType();
if (VF > 1 && MinBWs.count(I))
RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
@@ -5850,9 +5823,9 @@ LoopVectorizationCostModel::getInstructi
// Certain instructions can be cheaper to vectorize if they have a constant
// second vector operand. One example of this are shifts on x86.
TargetTransformInfo::OperandValueKind Op1VK =
- TargetTransformInfo::OK_AnyValue;
+ TargetTransformInfo::OK_AnyValue;
TargetTransformInfo::OperandValueKind Op2VK =
- TargetTransformInfo::OK_AnyValue;
+ TargetTransformInfo::OK_AnyValue;
TargetTransformInfo::OperandValueProperties Op1VP =
TargetTransformInfo::OP_None;
TargetTransformInfo::OperandValueProperties Op2VP =
@@ -5903,28 +5876,27 @@ LoopVectorizationCostModel::getInstructi
case Instruction::Load: {
StoreInst *SI = dyn_cast<StoreInst>(I);
LoadInst *LI = dyn_cast<LoadInst>(I);
- Type *ValTy = (SI ? SI->getValueOperand()->getType() :
- LI->getType());
+ Type *ValTy = (SI ? SI->getValueOperand()->getType() : LI->getType());
VectorTy = ToVectorTy(ValTy, VF);
unsigned Alignment = SI ? SI->getAlignment() : LI->getAlignment();
- unsigned AS = SI ? SI->getPointerAddressSpace() :
- LI->getPointerAddressSpace();
+ unsigned AS =
+ SI ? SI->getPointerAddressSpace() : LI->getPointerAddressSpace();
Value *Ptr = SI ? SI->getPointerOperand() : LI->getPointerOperand();
// We add the cost of address computation here instead of with the gep
// instruction because only here we know whether the operation is
// scalarized.
if (VF == 1)
return TTI.getAddressComputationCost(VectorTy) +
- TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
+ TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
if (LI && Legal->isUniform(Ptr)) {
// Scalar load + broadcast
unsigned Cost = TTI.getAddressComputationCost(ValTy->getScalarType());
Cost += TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
Alignment, AS);
- return Cost + TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast,
- ValTy);
+ return Cost +
+ TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, ValTy);
}
// For an interleaved access, calculate the total cost of the whole
@@ -5969,8 +5941,8 @@ LoopVectorizationCostModel::getInstructi
// Scalarized loads/stores.
int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
- bool UseGatherOrScatter = (ConsecutiveStride == 0) &&
- isGatherOrScatterLegal(I, Ptr, Legal);
+ bool UseGatherOrScatter =
+ (ConsecutiveStride == 0) && isGatherOrScatterLegal(I, Ptr, Legal);
bool Reverse = ConsecutiveStride < 0;
const DataLayout &DL = I->getModule()->getDataLayout();
@@ -5979,7 +5951,7 @@ LoopVectorizationCostModel::getInstructi
if ((!ConsecutiveStride && !UseGatherOrScatter) ||
ScalarAllocatedSize != VectorElementSize) {
bool IsComplexComputation =
- isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop);
+ isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop);
unsigned Cost = 0;
// The cost of extracting from the value vector and pointer vector.
Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
@@ -5989,15 +5961,16 @@ LoopVectorizationCostModel::getInstructi
// In case of STORE, the cost of ExtractElement from the vector.
// In case of LOAD, the cost of InsertElement into the returned
// vector.
- Cost += TTI.getVectorInstrCost(SI ? Instruction::ExtractElement :
- Instruction::InsertElement,
- VectorTy, i);
+ Cost += TTI.getVectorInstrCost(SI ? Instruction::ExtractElement
+ : Instruction::InsertElement,
+ VectorTy, i);
}
// The cost of the scalar loads/stores.
Cost += VF * TTI.getAddressComputationCost(PtrTy, IsComplexComputation);
- Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
- Alignment, AS);
+ Cost += VF *
+ TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
+ Alignment, AS);
return Cost;
}
@@ -6006,19 +5979,18 @@ LoopVectorizationCostModel::getInstructi
assert(ConsecutiveStride == 0 &&
"Gather/Scatter are not used for consecutive stride");
return Cost +
- TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
- Legal->isMaskRequired(I), Alignment);
+ TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
+ Legal->isMaskRequired(I), Alignment);
}
// Wide load/stores.
if (Legal->isMaskRequired(I))
- Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment,
- AS);
+ Cost +=
+ TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
else
Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
if (Reverse)
- Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
- VectorTy, 0);
+ Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0);
return Cost;
}
case Instruction::ZExt:
@@ -6051,13 +6023,13 @@ LoopVectorizationCostModel::getInstructi
Type *MinVecTy = VectorTy;
if (I->getOpcode() == Instruction::Trunc) {
SrcVecTy = smallestIntegerVectorType(SrcVecTy, MinVecTy);
- VectorTy = largestIntegerVectorType(ToVectorTy(I->getType(), VF),
- MinVecTy);
+ VectorTy =
+ largestIntegerVectorType(ToVectorTy(I->getType(), VF), MinVecTy);
} else if (I->getOpcode() == Instruction::ZExt ||
I->getOpcode() == Instruction::SExt) {
SrcVecTy = largestIntegerVectorType(SrcVecTy, MinVecTy);
- VectorTy = smallestIntegerVectorType(ToVectorTy(I->getType(), VF),
- MinVecTy);
+ VectorTy =
+ smallestIntegerVectorType(ToVectorTy(I->getType(), VF), MinVecTy);
}
}
@@ -6078,10 +6050,10 @@ LoopVectorizationCostModel::getInstructi
unsigned Cost = 0;
if (!RetTy->isVoidTy() && VF != 1) {
- unsigned InsCost = TTI.getVectorInstrCost(Instruction::InsertElement,
- VectorTy);
- unsigned ExtCost = TTI.getVectorInstrCost(Instruction::ExtractElement,
- VectorTy);
+ unsigned InsCost =
+ TTI.getVectorInstrCost(Instruction::InsertElement, VectorTy);
+ unsigned ExtCost =
+ TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy);
// The cost of inserting the results plus extracting each one of the
// operands.
@@ -6093,7 +6065,7 @@ LoopVectorizationCostModel::getInstructi
Cost += VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy);
return Cost;
}
- }// end of switch.
+ } // end of switch.
}
char LoopVectorize::ID = 0;
@@ -6115,9 +6087,9 @@ INITIALIZE_PASS_DEPENDENCY(DemandedBitsW
INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
namespace llvm {
- Pass *createLoopVectorizePass(bool NoUnrolling, bool AlwaysVectorize) {
- return new LoopVectorize(NoUnrolling, AlwaysVectorize);
- }
+Pass *createLoopVectorizePass(bool NoUnrolling, bool AlwaysVectorize) {
+ return new LoopVectorize(NoUnrolling, AlwaysVectorize);
+}
}
bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
@@ -6132,7 +6104,6 @@ bool LoopVectorizationCostModel::isConse
return false;
}
-
void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
bool IfPredicateStore) {
assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
@@ -6174,8 +6145,7 @@ void InnerLoopUnroller::scalarizeInstruc
// Does this instruction return a value ?
bool IsVoidRetTy = Instr->getType()->isVoidTy();
- Value *UndefVec = IsVoidRetTy ? nullptr :
- UndefValue::get(Instr->getType());
+ Value *UndefVec = IsVoidRetTy ? nullptr : UndefValue::get(Instr->getType());
// Create a new entry in the WidenMap and initialize it to Undef or Null.
VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
@@ -6202,31 +6172,30 @@ void InnerLoopUnroller::scalarizeInstruc
}
Instruction *Cloned = Instr->clone();
- if (!IsVoidRetTy)
- Cloned->setName(Instr->getName() + ".cloned");
- // Replace the operands of the cloned instructions with extracted scalars.
- for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
- Value *Op = Params[op][Part];
- Cloned->setOperand(op, Op);
- }
-
- // Place the cloned scalar in the new loop.
- Builder.Insert(Cloned);
-
- // If we just cloned a new assumption, add it the assumption cache.
- if (auto *II = dyn_cast<IntrinsicInst>(Cloned))
- if (II->getIntrinsicID() == Intrinsic::assume)
- AC->registerAssumption(II);
-
- // If the original scalar returns a value we need to place it in a vector
- // so that future users will be able to use it.
- if (!IsVoidRetTy)
- VecResults[Part] = Cloned;
-
- // End if-block.
- if (IfPredicateStore)
- PredicatedStores.push_back(std::make_pair(cast<StoreInst>(Cloned),
- Cmp));
+ if (!IsVoidRetTy)
+ Cloned->setName(Instr->getName() + ".cloned");
+ // Replace the operands of the cloned instructions with extracted scalars.
+ for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
+ Value *Op = Params[op][Part];
+ Cloned->setOperand(op, Op);
+ }
+
+ // Place the cloned scalar in the new loop.
+ Builder.Insert(Cloned);
+
+ // If we just cloned a new assumption, add it the assumption cache.
+ if (auto *II = dyn_cast<IntrinsicInst>(Cloned))
+ if (II->getIntrinsicID() == Intrinsic::assume)
+ AC->registerAssumption(II);
+
+ // If the original scalar returns a value we need to place it in a vector
+ // so that future users will be able to use it.
+ if (!IsVoidRetTy)
+ VecResults[Part] = Cloned;
+
+ // End if-block.
+ if (IfPredicateStore)
+ PredicatedStores.push_back(std::make_pair(cast<StoreInst>(Cloned), Cmp));
}
}
@@ -6237,13 +6206,9 @@ void InnerLoopUnroller::vectorizeMemoryI
return scalarizeInstruction(Instr, IfPredicateStore);
}
-Value *InnerLoopUnroller::reverseVector(Value *Vec) {
- return Vec;
-}
+Value *InnerLoopUnroller::reverseVector(Value *Vec) { return Vec; }
-Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) {
- return V;
-}
+Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; }
Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step) {
// When unrolling and the VF is 1, we only need to add a simple scalar.
More information about the llvm-commits
mailing list