[llvm] r258777 - [LIR] Add support for structs and hand unrolled loops
Haicheng Wu via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 25 18:27:48 PST 2016
Author: haicheng
Date: Mon Jan 25 20:27:47 2016
New Revision: 258777
URL: http://llvm.org/viewvc/llvm-project?rev=258777&view=rev
Log:
[LIR] Add support for structs and hand unrolled loops
This is a recommit of r258620 which causes PR26293.
The original message:
Now LIR can turn following codes into memset:
typedef struct foo {
int a;
int b;
} foo_t;
void bar(foo_t *f, unsigned n) {
for (unsigned i = 0; i < n; ++i) {
f[i].a = 0;
f[i].b = 0;
}
}
void test(foo_t *f, unsigned n) {
for (unsigned i = 0; i < n; i += 2) {
f[i] = 0;
f[i+1] = 0;
}
}
Added:
llvm/trunk/test/Transforms/LoopIdiom/struct.ll
llvm/trunk/test/Transforms/LoopIdiom/struct_pattern.ll
llvm/trunk/test/Transforms/LoopIdiom/unroll.ll
Modified:
llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h
llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp
llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
Modified: llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h?rev=258777&r1=258776&r2=258777&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h (original)
+++ llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h Mon Jan 25 20:27:47 2016
@@ -659,6 +659,11 @@ const SCEV *replaceSymbolicStrideSCEV(Pr
int isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
const ValueToValueMap &StridesMap);
+/// \brief Returns true if the memory operations \p A and \p B are consecutive.
+/// This is a simple API that does not depend on the analysis pass.
+bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
+ ScalarEvolution &SE, bool CheckType = true);
+
/// \brief This analysis provides dependence information for the memory accesses
/// of a loop.
///
Modified: llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp?rev=258777&r1=258776&r2=258777&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp (original)
+++ llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp Mon Jan 25 20:27:47 2016
@@ -901,6 +901,78 @@ int llvm::isStridedPtr(PredicatedScalarE
return Stride;
}
+/// Take the pointer operand from the Load/Store instruction.
+/// Returns NULL if this is not a valid Load/Store instruction.
+static Value *getPointerOperand(Value *I) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->getPointerOperand();
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->getPointerOperand();
+ return nullptr;
+}
+
+/// Take the address space operand from the Load/Store instruction.
+/// Returns -1 if this is not a valid Load/Store instruction.
+static unsigned getAddressSpaceOperand(Value *I) {
+ if (LoadInst *L = dyn_cast<LoadInst>(I))
+ return L->getPointerAddressSpace();
+ if (StoreInst *S = dyn_cast<StoreInst>(I))
+ return S->getPointerAddressSpace();
+ return -1;
+}
+
+/// Returns true if the memory operations \p A and \p B are consecutive.
+bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
+ ScalarEvolution &SE, bool CheckType) {
+ Value *PtrA = getPointerOperand(A);
+ Value *PtrB = getPointerOperand(B);
+ unsigned ASA = getAddressSpaceOperand(A);
+ unsigned ASB = getAddressSpaceOperand(B);
+
+ // Check that the address spaces match and that the pointers are valid.
+ if (!PtrA || !PtrB || (ASA != ASB))
+ return false;
+
+ // Make sure that A and B are different pointers.
+ if (PtrA == PtrB)
+ return false;
+
+ // Make sure that A and B have the same type if required.
+ if(CheckType && PtrA->getType() != PtrB->getType())
+ return false;
+
+ unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
+ Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
+ APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty));
+
+ APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
+ PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
+ PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
+
+ // OffsetDelta = OffsetB - OffsetA;
+ const SCEV *OffsetSCEVA = SE.getConstant(OffsetA);
+ const SCEV *OffsetSCEVB = SE.getConstant(OffsetB);
+ const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
+ const SCEVConstant *OffsetDeltaC = dyn_cast<SCEVConstant>(OffsetDeltaSCEV);
+ const APInt &OffsetDelta = OffsetDeltaC->getAPInt();
+ // Check if they are based on the same pointer. That makes the offsets
+ // sufficient.
+ if (PtrA == PtrB)
+ return OffsetDelta == Size;
+
+ // Compute the necessary base pointer delta to have the necessary final delta
+ // equal to the size.
+ // BaseDelta = Size - OffsetDelta;
+ const SCEV *SizeSCEV = SE.getConstant(Size);
+ const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV);
+
+ // Otherwise compute the distance with SCEV between the base pointers.
+ const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
+ const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
+ const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta);
+ return X == PtrSCEVB;
+}
+
bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
switch (Type) {
case NoDep:
Modified: llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp?rev=258777&r1=258776&r2=258777&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp Mon Jan 25 20:27:47 2016
@@ -26,22 +26,20 @@
// i64 and larger types when i64 is legal and the value has few bits set. It
// would be good to enhance isel to emit a loop for ctpop in this case.
//
-// We should enhance the memset/memcpy recognition to handle multiple stores in
-// the loop. This would handle things like:
-// void foo(_Complex float *P)
-// for (i) { __real__(*P) = 0; __imag__(*P) = 0; }
-//
// This could recognize common matrix multiplies and dot product idioms and
// replace them with calls to BLAS (if linked in??).
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -108,7 +106,9 @@ public:
private:
typedef SmallVector<StoreInst *, 8> StoreList;
- StoreList StoreRefsForMemset;
+ typedef MapVector<Value *, StoreList> StoreListMap;
+ StoreListMap StoreRefsForMemset;
+ StoreListMap StoreRefsForMemsetPattern;
StoreList StoreRefsForMemcpy;
bool HasMemset;
bool HasMemsetPattern;
@@ -122,14 +122,18 @@ private:
SmallVectorImpl<BasicBlock *> &ExitBlocks);
void collectStores(BasicBlock *BB);
- bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemcpy);
- bool processLoopStore(StoreInst *SI, const SCEV *BECount);
+ bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemsetPattern,
+ bool &ForMemcpy);
+ bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount,
+ bool ForMemset);
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
unsigned StoreAlignment, Value *StoredVal,
- Instruction *TheStore, const SCEVAddRecExpr *Ev,
- const SCEV *BECount, bool NegStride);
+ Instruction *TheStore,
+ SmallPtrSetImpl<Instruction *> &Stores,
+ const SCEVAddRecExpr *Ev, const SCEV *BECount,
+ bool NegStride);
bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
/// @}
@@ -305,7 +309,7 @@ static Constant *getMemSetPatternValue(V
}
bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
- bool &ForMemcpy) {
+ bool &ForMemsetPattern, bool &ForMemcpy) {
// Don't touch volatile stores.
if (!SI->isSimple())
return false;
@@ -353,7 +357,7 @@ bool LoopIdiomRecognize::isLegalStore(St
StorePtr->getType()->getPointerAddressSpace() == 0 &&
(PatternValue = getMemSetPatternValue(StoredVal, DL))) {
// It looks like we can use PatternValue!
- ForMemset = true;
+ ForMemsetPattern = true;
return true;
}
@@ -393,6 +397,7 @@ bool LoopIdiomRecognize::isLegalStore(St
void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
StoreRefsForMemset.clear();
+ StoreRefsForMemsetPattern.clear();
StoreRefsForMemcpy.clear();
for (Instruction &I : *BB) {
StoreInst *SI = dyn_cast<StoreInst>(&I);
@@ -400,15 +405,22 @@ void LoopIdiomRecognize::collectStores(B
continue;
bool ForMemset = false;
+ bool ForMemsetPattern = false;
bool ForMemcpy = false;
// Make sure this is a strided store with a constant stride.
- if (!isLegalStore(SI, ForMemset, ForMemcpy))
+ if (!isLegalStore(SI, ForMemset, ForMemsetPattern, ForMemcpy))
continue;
// Save the store locations.
- if (ForMemset)
- StoreRefsForMemset.push_back(SI);
- else if (ForMemcpy)
+ if (ForMemset) {
+ // Find the base pointer.
+ Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
+ StoreRefsForMemset[Ptr].push_back(SI);
+ } else if (ForMemsetPattern) {
+ // Find the base pointer.
+ Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
+ StoreRefsForMemsetPattern[Ptr].push_back(SI);
+ } else if (ForMemcpy)
StoreRefsForMemcpy.push_back(SI);
}
}
@@ -430,9 +442,14 @@ bool LoopIdiomRecognize::runOnLoopBlock(
// Look for store instructions, which may be optimized to memset/memcpy.
collectStores(BB);
- // Look for a single store which can be optimized into a memset.
- for (auto &SI : StoreRefsForMemset)
- MadeChange |= processLoopStore(SI, BECount);
+ // Look for a single store or sets of stores with a common base, which can be
+ // optimized into a memset (memset_pattern). The latter most commonly happens
+ // with structs and handunrolled loops.
+ for (auto &SL : StoreRefsForMemset)
+ MadeChange |= processLoopStores(SL.second, BECount, true);
+
+ for (auto &SL : StoreRefsForMemsetPattern)
+ MadeChange |= processLoopStores(SL.second, BECount, false);
// Optimize the store into a memcpy, if it feeds an similarly strided load.
for (auto &SI : StoreRefsForMemcpy)
@@ -458,26 +475,144 @@ bool LoopIdiomRecognize::runOnLoopBlock(
return MadeChange;
}
-/// processLoopStore - See if this store can be promoted to a memset.
-bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
- assert(SI->isSimple() && "Expected only non-volatile stores.");
+/// processLoopStores - See if this store(s) can be promoted to a memset.
+bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
+ const SCEV *BECount,
+ bool ForMemset) {
+ // Try to find consecutive stores that can be transformed into memsets.
+ SetVector<StoreInst *> Heads, Tails;
+ SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain;
+
+ // Do a quadratic search on all of the given stores and find
+ // all of the pairs of stores that follow each other.
+ SmallVector<unsigned, 16> IndexQueue;
+ for (unsigned i = 0, e = SL.size(); i < e; ++i) {
+ assert(SL[i]->isSimple() && "Expected only non-volatile stores.");
+
+ Value *FirstStoredVal = SL[i]->getValueOperand();
+ Value *FirstStorePtr = SL[i]->getPointerOperand();
+ const SCEVAddRecExpr *FirstStoreEv =
+ cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr));
+ unsigned FirstStride = getStoreStride(FirstStoreEv);
+ unsigned FirstStoreSize = getStoreSizeInBytes(SL[i], DL);
+
+ // See if we can optimize just this store in isolation.
+ if (FirstStride == FirstStoreSize || FirstStride == -FirstStoreSize) {
+ Heads.insert(SL[i]);
+ continue;
+ }
- Value *StoredVal = SI->getValueOperand();
- Value *StorePtr = SI->getPointerOperand();
+ Value *FirstSplatValue = nullptr;
+ Constant *FirstPatternValue = nullptr;
- // Check to see if the stride matches the size of the store. If so, then we
- // know that every byte is touched in the loop.
- const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
- unsigned Stride = getStoreStride(StoreEv);
- unsigned StoreSize = getStoreSizeInBytes(SI, DL);
- if (StoreSize != Stride && StoreSize != -Stride)
- return false;
+ if (ForMemset)
+ FirstSplatValue = isBytewiseValue(FirstStoredVal);
+ else
+ FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
+
+ assert((FirstSplatValue || FirstPatternValue) &&
+ "Expected either splat value or pattern value.");
+
+ IndexQueue.clear();
+ // If a store has multiple consecutive store candidates, search Stores
+ // array according to the sequence: from i+1 to e, then from i-1 to 0.
+ // This is because usually pairing with immediate succeeding or preceding
+ // candidate create the best chance to find memset opportunity.
+ unsigned j = 0;
+ for (j = i + 1; j < e; ++j)
+ IndexQueue.push_back(j);
+ for (j = i; j > 0; --j)
+ IndexQueue.push_back(j - 1);
+
+ for (auto &k : IndexQueue) {
+ assert(SL[k]->isSimple() && "Expected only non-volatile stores.");
+ Value *SecondStorePtr = SL[k]->getPointerOperand();
+ const SCEVAddRecExpr *SecondStoreEv =
+ cast<SCEVAddRecExpr>(SE->getSCEV(SecondStorePtr));
+ unsigned SecondStride = getStoreStride(SecondStoreEv);
- bool NegStride = StoreSize == -Stride;
+ if (FirstStride != SecondStride)
+ continue;
+
+ Value *SecondStoredVal = SL[k]->getValueOperand();
+ Value *SecondSplatValue = nullptr;
+ Constant *SecondPatternValue = nullptr;
+
+ if (ForMemset)
+ SecondSplatValue = isBytewiseValue(SecondStoredVal);
+ else
+ SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
+
+ assert((SecondSplatValue || SecondPatternValue) &&
+ "Expected either splat value or pattern value.");
+
+ if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
+ if (ForMemset) {
+ if (FirstSplatValue != SecondSplatValue)
+ continue;
+ } else {
+ if (FirstPatternValue != SecondPatternValue)
+ continue;
+ }
+ Tails.insert(SL[k]);
+ Heads.insert(SL[i]);
+ ConsecutiveChain[SL[i]] = SL[k];
+ break;
+ }
+ }
+ }
+
+ // We may run into multiple chains that merge into a single chain. We mark the
+ // stores that we transformed so that we don't visit the same store twice.
+ SmallPtrSet<Value *, 16> TransformedStores;
+ bool Changed = false;
+
+ // For stores that start but don't end a link in the chain:
+ for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
+ it != e; ++it) {
+ if (Tails.count(*it))
+ continue;
+
+ // We found a store instr that starts a chain. Now follow the chain and try
+ // to transform it.
+ SmallPtrSet<Instruction *, 8> AdjacentStores;
+ StoreInst *I = *it;
+
+ StoreInst *HeadStore = I;
+ unsigned StoreSize = 0;
+
+ // Collect the chain into a list.
+ while (Tails.count(I) || Heads.count(I)) {
+ if (TransformedStores.count(I))
+ break;
+ AdjacentStores.insert(I);
+
+ StoreSize += getStoreSizeInBytes(I, DL);
+ // Move to the next value in the chain.
+ I = ConsecutiveChain[I];
+ }
+
+ Value *StoredVal = HeadStore->getValueOperand();
+ Value *StorePtr = HeadStore->getPointerOperand();
+ const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+ unsigned Stride = getStoreStride(StoreEv);
+
+ // Check to see if the stride matches the size of the stores. If so, then
+ // we know that every byte is touched in the loop.
+ if (StoreSize != Stride && StoreSize != -Stride)
+ continue;
+
+ bool NegStride = StoreSize == -Stride;
+
+ if (processLoopStridedStore(StorePtr, StoreSize, HeadStore->getAlignment(),
+ StoredVal, HeadStore, AdjacentStores, StoreEv,
+ BECount, NegStride)) {
+ TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
+ Changed = true;
+ }
+ }
- // See if we can optimize just this store in isolation.
- return processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
- StoredVal, SI, StoreEv, BECount, NegStride);
+ return Changed;
}
/// processLoopMemSet - See if this memset can be promoted to a large memset.
@@ -520,18 +655,21 @@ bool LoopIdiomRecognize::processLoopMemS
if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue))
return false;
+ SmallPtrSet<Instruction *, 1> MSIs;
+ MSIs.insert(MSI);
return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
- MSI->getAlignment(), SplatValue, MSI, Ev,
+ MSI->getAlignment(), SplatValue, MSI, MSIs, Ev,
BECount, /*NegStride=*/false);
}
/// mayLoopAccessLocation - Return true if the specified loop might access the
/// specified pointer location, which is a loop-strided access. The 'Access'
/// argument specifies what the verboten forms of access are (read or write).
-static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
- const SCEV *BECount, unsigned StoreSize,
- AliasAnalysis &AA,
- Instruction *IgnoredStore) {
+static bool
+mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
+ const SCEV *BECount, unsigned StoreSize,
+ AliasAnalysis &AA,
+ SmallPtrSetImpl<Instruction *> &IgnoredStores) {
// Get the location that may be stored across the loop. Since the access is
// strided positively through memory, we say that the modified location starts
// at the pointer and has infinite size.
@@ -551,7 +689,8 @@ static bool mayLoopAccessLocation(Value
for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
++BI)
for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
- if (&*I != IgnoredStore && (AA.getModRefInfo(&*I, StoreLoc) & Access))
+ if (IgnoredStores.count(&*I) == 0 &&
+ (AA.getModRefInfo(&*I, StoreLoc) & Access))
return true;
return false;
@@ -574,7 +713,8 @@ static const SCEV *getStartForNegStride(
/// transform this into a memset or memset_pattern in the loop preheader, do so.
bool LoopIdiomRecognize::processLoopStridedStore(
Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
- Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev,
+ Value *StoredVal, Instruction *TheStore,
+ SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev,
const SCEV *BECount, bool NegStride) {
Value *SplatValue = isBytewiseValue(StoredVal);
Constant *PatternValue = nullptr;
@@ -609,7 +749,7 @@ bool LoopIdiomRecognize::processLoopStri
Value *BasePtr =
Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
if (mayLoopAccessLocation(BasePtr, MRI_ModRef, CurLoop, BECount, StoreSize,
- *AA, TheStore)) {
+ *AA, Stores)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI);
@@ -662,7 +802,8 @@ bool LoopIdiomRecognize::processLoopStri
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
- deleteDeadInstruction(TheStore, TLI);
+ for (auto *I : Stores)
+ deleteDeadInstruction(I, TLI);
++NumMemSet;
return true;
}
@@ -714,8 +855,10 @@ bool LoopIdiomRecognize::processLoopStor
Value *StoreBasePtr = Expander.expandCodeFor(
StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
+ SmallPtrSet<Instruction *, 1> Stores;
+ Stores.insert(SI);
if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
- StoreSize, *AA, SI)) {
+ StoreSize, *AA, Stores)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
@@ -735,7 +878,7 @@ bool LoopIdiomRecognize::processLoopStor
LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize,
- *AA, SI)) {
+ *AA, Stores)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=258777&r1=258776&r2=258777&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Mon Jan 25 20:27:47 2016
@@ -27,6 +27,7 @@
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -404,9 +405,6 @@ public:
MinBWs.clear();
}
- /// \returns true if the memory operations A and B are consecutive.
- bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL);
-
/// \brief Perform LICM and CSE on the newly generated gather sequences.
void optimizeGatherSequence();
@@ -445,14 +443,6 @@ private:
/// vectorized, or NULL. They may happen in cycles.
Value *alreadyVectorized(ArrayRef<Value *> VL) const;
- /// \brief Take the pointer operand from the Load/Store instruction.
- /// \returns NULL if this is not a valid Load/Store instruction.
- static Value *getPointerOperand(Value *I);
-
- /// \brief Take the address space operand from the Load/Store instruction.
- /// \returns -1 if this is not a valid Load/Store instruction.
- static unsigned getAddressSpaceOperand(Value *I);
-
/// \returns the scalarization cost for this type. Scalarization in this
/// context means the creation of vectors from a group of scalars.
int getGatherCost(Type *Ty);
@@ -1204,8 +1194,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
return;
}
- if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) {
- if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL)) {
+ if (!isConsecutiveAccess(VL[i], VL[i + 1], DL, *SE)) {
+ if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL, *SE)) {
++NumLoadsWantToChangeOrder;
}
BS.cancelScheduling(VL);
@@ -1377,7 +1367,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
const DataLayout &DL = F->getParent()->getDataLayout();
// Check if the stores are consecutive or of we need to swizzle them.
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
- if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) {
+ if (!isConsecutiveAccess(VL[i], VL[i + 1], DL, *SE)) {
BS.cancelScheduling(VL);
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
@@ -1866,63 +1856,6 @@ int BoUpSLP::getGatherCost(ArrayRef<Valu
return getGatherCost(VecTy);
}
-Value *BoUpSLP::getPointerOperand(Value *I) {
- if (LoadInst *LI = dyn_cast<LoadInst>(I))
- return LI->getPointerOperand();
- if (StoreInst *SI = dyn_cast<StoreInst>(I))
- return SI->getPointerOperand();
- return nullptr;
-}
-
-unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
- if (LoadInst *L = dyn_cast<LoadInst>(I))
- return L->getPointerAddressSpace();
- if (StoreInst *S = dyn_cast<StoreInst>(I))
- return S->getPointerAddressSpace();
- return -1;
-}
-
-bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL) {
- Value *PtrA = getPointerOperand(A);
- Value *PtrB = getPointerOperand(B);
- unsigned ASA = getAddressSpaceOperand(A);
- unsigned ASB = getAddressSpaceOperand(B);
-
- // Check that the address spaces match and that the pointers are valid.
- if (!PtrA || !PtrB || (ASA != ASB))
- return false;
-
- // Make sure that A and B are different pointers of the same type.
- if (PtrA == PtrB || PtrA->getType() != PtrB->getType())
- return false;
-
- unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
- Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
- APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty));
-
- APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
- PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
- PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
-
- APInt OffsetDelta = OffsetB - OffsetA;
-
- // Check if they are based on the same pointer. That makes the offsets
- // sufficient.
- if (PtrA == PtrB)
- return OffsetDelta == Size;
-
- // Compute the necessary base pointer delta to have the necessary final delta
- // equal to the size.
- APInt BaseDelta = Size - OffsetDelta;
-
- // Otherwise compute the distance with SCEV between the base pointers.
- const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
- const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
- const SCEV *C = SE->getConstant(BaseDelta);
- const SCEV *X = SE->getAddExpr(PtrSCEVA, C);
- return X == PtrSCEVB;
-}
-
// Reorder commutative operations in alternate shuffle if the resulting vectors
// are consecutive loads. This would allow us to vectorize the tree.
// If we have something like-
@@ -1950,10 +1883,10 @@ void BoUpSLP::reorderAltShuffleOperands(
if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
Instruction *VL1 = cast<Instruction>(VL[j]);
Instruction *VL2 = cast<Instruction>(VL[j + 1]);
- if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) {
+ if (isConsecutiveAccess(L, L1, DL, *SE) && VL1->isCommutative()) {
std::swap(Left[j], Right[j]);
continue;
- } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) {
+ } else if (isConsecutiveAccess(L, L1, DL, *SE) && VL2->isCommutative()) {
std::swap(Left[j + 1], Right[j + 1]);
continue;
}
@@ -1964,10 +1897,10 @@ void BoUpSLP::reorderAltShuffleOperands(
if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
Instruction *VL1 = cast<Instruction>(VL[j]);
Instruction *VL2 = cast<Instruction>(VL[j + 1]);
- if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) {
+ if (isConsecutiveAccess(L, L1, DL, *SE) && VL1->isCommutative()) {
std::swap(Left[j], Right[j]);
continue;
- } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) {
+ } else if (isConsecutiveAccess(L, L1, DL, *SE) && VL2->isCommutative()) {
std::swap(Left[j + 1], Right[j + 1]);
continue;
}
@@ -2117,7 +2050,7 @@ void BoUpSLP::reorderInputsAccordingToOp
for (unsigned j = 0; j < VL.size() - 1; ++j) {
if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
- if (isConsecutiveAccess(L, L1, DL)) {
+ if (isConsecutiveAccess(L, L1, DL, *SE)) {
std::swap(Left[j + 1], Right[j + 1]);
continue;
}
@@ -2125,7 +2058,7 @@ void BoUpSLP::reorderInputsAccordingToOp
}
if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
- if (isConsecutiveAccess(L, L1, DL)) {
+ if (isConsecutiveAccess(L, L1, DL, *SE)) {
std::swap(Left[j + 1], Right[j + 1]);
continue;
}
@@ -3674,7 +3607,7 @@ bool SLPVectorizer::vectorizeStores(Arra
IndexQueue.push_back(j - 1);
for (auto &k : IndexQueue) {
- if (R.isConsecutiveAccess(Stores[i], Stores[k], DL)) {
+ if (isConsecutiveAccess(Stores[i], Stores[k], DL, *SE)) {
Tails.insert(Stores[k]);
Heads.insert(Stores[i]);
ConsecutiveChain[Stores[i]] = Stores[k];
Added: llvm/trunk/test/Transforms/LoopIdiom/struct.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/struct.ll?rev=258777&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopIdiom/struct.ll (added)
+++ llvm/trunk/test/Transforms/LoopIdiom/struct.ll Mon Jan 25 20:27:47 2016
@@ -0,0 +1,221 @@
+; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+target triple = "x86_64-apple-darwin10.0.0"
+
+%struct.foo = type { i32, i32 }
+%struct.foo1 = type { i32, i32, i32 }
+%struct.foo2 = type { i32, i16, i16 }
+
+;void bar1(foo_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].a = 0;
+; f[i].b = 0;
+; }
+;}
+define void @bar1(%struct.foo* %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
+ store i32 0, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
+ store i32 0, i32* %b, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar1(
+; CHECK: call void @llvm.memset
+; CHECK-NOT: store
+}
+
+;void bar2(foo_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].b = 0;
+; f[i].a = 0;
+; }
+;}
+define void @bar2(%struct.foo* %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
+ store i32 0, i32* %b, align 4
+ %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
+ store i32 0, i32* %a, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar2(
+; CHECK: call void @llvm.memset
+; CHECK-NOT: store
+}
+
+;void bar3(foo_t *f, unsigned n) {
+; for (unsigned i = n; i > 0; --i) {
+; f[i].a = 0;
+; f[i].b = 0;
+; }
+;}
+define void @bar3(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ %0 = zext i32 %n to i64
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
+ store i32 0, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
+ store i32 0, i32* %b, align 4
+ %1 = trunc i64 %indvars.iv to i32
+ %dec = add i32 %1, -1
+ %cmp = icmp eq i32 %dec, 0
+ %indvars.iv.next = add nsw i64 %indvars.iv, -1
+ br i1 %cmp, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar3(
+; CHECK: call void @llvm.memset
+; CHECK-NOT: store
+}
+
+;void bar4(foo_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].a = 0;
+; f[i].b = 1;
+; }
+;}
+define void @bar4(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
+ store i32 0, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
+ store i32 1, i32* %b, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar4(
+; CHECK-NOT: call void @llvm.memset
+}
+
+;void bar5(foo1_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].a = 0;
+; f[i].b = 0;
+; }
+;}
+define void @bar5(%struct.foo1* nocapture %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 0
+ store i32 0, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 1
+ store i32 0, i32* %b, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar5(
+; CHECK-NOT: call void @llvm.memset
+}
+
+;void bar6(foo2_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].a = 0;
+; f[i].b = 0;
+; f[i].c = 0;
+; }
+;}
+define void @bar6(%struct.foo2* nocapture %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i64 %indvars.iv, i32 0
+ store i32 0, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i64 %indvars.iv, i32 1
+ store i16 0, i16* %b, align 4
+ %c = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i64 %indvars.iv, i32 2
+ store i16 0, i16* %c, align 2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar6(
+; CHECK: call void @llvm.memset
+; CHECK-NOT: store
+}
Added: llvm/trunk/test/Transforms/LoopIdiom/struct_pattern.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/struct_pattern.ll?rev=258777&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopIdiom/struct_pattern.ll (added)
+++ llvm/trunk/test/Transforms/LoopIdiom/struct_pattern.ll Mon Jan 25 20:27:47 2016
@@ -0,0 +1,186 @@
+; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
+; CHECK: @.memset_pattern.1 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
+; CHECK: @.memset_pattern.2 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
+
+target triple = "x86_64-apple-darwin10.0.0"
+
+%struct.foo = type { i32, i32 }
+%struct.foo1 = type { i32, i32, i32 }
+
+;void bar1(foo_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].a = 2;
+; f[i].b = 2;
+; }
+;}
+define void @bar1(%struct.foo* %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
+ store i32 2, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
+ store i32 2, i32* %b, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar1(
+; CHECK: call void @memset_pattern16
+; CHECK-NOT: store
+}
+
+;void bar2(foo_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].b = 2;
+; f[i].a = 2;
+; }
+;}
+define void @bar2(%struct.foo* %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
+ store i32 2, i32* %b, align 4
+ %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
+ store i32 2, i32* %a, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar2(
+; CHECK: call void @memset_pattern16
+; CHECK-NOT: store
+}
+
+;void bar3(foo_t *f, unsigned n) {
+; for (unsigned i = n; i > 0; --i) {
+; f[i].a = 2;
+; f[i].b = 2;
+; }
+;}
+define void @bar3(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ %0 = zext i32 %n to i64
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
+ store i32 2, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
+ store i32 2, i32* %b, align 4
+ %1 = trunc i64 %indvars.iv to i32
+ %dec = add i32 %1, -1
+ %cmp = icmp eq i32 %dec, 0
+ %indvars.iv.next = add nsw i64 %indvars.iv, -1
+ br i1 %cmp, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar3(
+; CHECK: call void @memset_pattern16
+; CHECK-NOT: store
+}
+
+;void bar4(foo_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].a = 0;
+; f[i].b = 1;
+; }
+;}
+define void @bar4(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
+ store i32 0, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
+ store i32 1, i32* %b, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar4(
+; CHECK-NOT: call void @memset_pattern16
+}
+
+;void bar5(foo1_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].a = 1;
+; f[i].b = 1;
+; }
+;}
+define void @bar5(%struct.foo1* nocapture %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 0
+ store i32 1, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 1
+ store i32 1, i32* %b, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar5(
+; CHECK-NOT: call void @memset_pattern16
+}
Added: llvm/trunk/test/Transforms/LoopIdiom/unroll.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/unroll.ll?rev=258777&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopIdiom/unroll.ll (added)
+++ llvm/trunk/test/Transforms/LoopIdiom/unroll.ll Mon Jan 25 20:27:47 2016
@@ -0,0 +1,80 @@
+; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; CHECK @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
+
+target triple = "x86_64-apple-darwin10.0.0"
+
+;void test(int *f, unsigned n) {
+; for (unsigned i = 0; i < 2 * n; i += 2) {
+; f[i] = 0;
+; f[i+1] = 0;
+; }
+;}
+define void @test(i32* %f, i32 %n) nounwind ssp {
+entry:
+ %mul = shl i32 %n, 1
+ %cmp1 = icmp eq i32 %mul, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ %0 = zext i32 %mul to i64
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
+ store i32 0, i32* %arrayidx, align 4
+ %1 = or i64 %indvars.iv, 1
+ %arrayidx2 = getelementptr inbounds i32, i32* %f, i64 %1
+ store i32 0, i32* %arrayidx2, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp ult i64 %indvars.iv.next, %0
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @test(
+; CHECK: call void @llvm.memset
+; CHECK-NOT: store
+}
+
+;void test_pattern(int *f, unsigned n) {
+; for (unsigned i = 0; i < 2 * n; i += 2) {
+; f[i] = 2;
+; f[i+1] = 2;
+; }
+;}
+define void @test_pattern(i32* %f, i32 %n) nounwind ssp {
+entry:
+ %mul = shl i32 %n, 1
+ %cmp1 = icmp eq i32 %mul, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ %0 = zext i32 %mul to i64
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
+ store i32 2, i32* %arrayidx, align 4
+ %1 = or i64 %indvars.iv, 1
+ %arrayidx2 = getelementptr inbounds i32, i32* %f, i64 %1
+ store i32 2, i32* %arrayidx2, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp ult i64 %indvars.iv.next, %0
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @test_pattern(
+; CHECK: call void @memset_pattern16
+; CHECK-NOT: store
+}
More information about the llvm-commits
mailing list