[llvm-commits] [llvm] r126040 - in /llvm/trunk: lib/Transforms/Scalar/LoopIdiomRecognize.cpp test/Transforms/LoopIdiom/basic.ll
Chris Lattner
sabre at nondot.org
Sat Feb 19 11:31:39 PST 2011
Author: lattner
Date: Sat Feb 19 13:31:39 2011
New Revision: 126040
URL: http://llvm.org/viewvc/llvm-project?rev=126040&view=rev
Log:
Implement rdar://9009151, transforming strided loop stores of
unsplatable values into memset_pattern16 when it is available
(recent darwins). This transforms lots of strided loop stores
of ints for example, like 5 in vpr:
Formed memset: call void @memset_pattern16(i8* %4, i8* getelementptr inbounds ([16 x i8]* @.memset_pattern9, i32 0, i32 0), i64 %tmp25)
from store to: {%3,+,4}<%11> at: store i32 3, i32* %scevgep, align 4, !tbaa !4
Modified:
llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
llvm/trunk/test/Transforms/LoopIdiom/basic.ll
Modified: llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp?rev=126040&r1=126039&r2=126040&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp Sat Feb 19 13:31:39 2011
@@ -39,6 +39,7 @@
#define DEBUG_TYPE "loop-idiom"
#include "llvm/Transforms/Scalar.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -76,11 +77,11 @@
bool processLoopStore(StoreInst *SI, const SCEV *BECount);
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
- bool processLoopStoreOfSplatValue(Value *DestPtr, unsigned StoreSize,
- unsigned StoreAlignment,
- Value *SplatValue, Instruction *TheStore,
- const SCEVAddRecExpr *Ev,
- const SCEV *BECount);
+ bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+ unsigned StoreAlignment,
+ Value *SplatValue, Instruction *TheStore,
+ const SCEVAddRecExpr *Ev,
+ const SCEV *BECount);
bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
const SCEVAddRecExpr *StoreEv,
const SCEVAddRecExpr *LoadEv,
@@ -275,14 +276,11 @@
// validity check in mayLoopAccessLocation to be updated though.
if (Stride == 0 || StoreSize != Stride->getValue()->getValue())
return false;
-
- // If the stored value is a byte-wise value (like i32 -1), then it may be
- // turned into a memset of i8 -1, assuming that all the consecutive bytes
- // are stored. A store of i32 0x01020304 can never be turned into a memset.
- if (Value *SplatValue = isBytewiseValue(StoredVal))
- if (processLoopStoreOfSplatValue(StorePtr, StoreSize, SI->getAlignment(),
- SplatValue, SI, StoreEv, BECount))
- return true;
+
+ // See if we can optimize just this store in isolation.
+ if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
+ StoredVal, SI, StoreEv, BECount))
+ return true;
// If the stored value is a strided load in the same loop with the same stride
// this this may be transformable into a memcpy. This kicks in for stuff like
@@ -333,9 +331,9 @@
if (Stride == 0 || MSI->getLength() != Stride->getValue())
return false;
- return processLoopStoreOfSplatValue(Pointer, (unsigned)SizeInBytes,
- MSI->getAlignment(), MSI->getValue(),
- MSI, Ev, BECount);
+ return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
+ MSI->getAlignment(), MSI->getValue(),
+ MSI, Ev, BECount);
}
@@ -372,22 +370,97 @@
return false;
}
-/// processLoopStoreOfSplatValue - We see a strided store of a memsetable value.
-/// If we can transform this into a memset in the loop preheader, do so.
-bool LoopIdiomRecognize::
-processLoopStoreOfSplatValue(Value *DestPtr, unsigned StoreSize,
- unsigned StoreAlignment, Value *SplatValue,
- Instruction *TheStore,
- const SCEVAddRecExpr *Ev, const SCEV *BECount) {
- // If we're not allowed to form memset, we fail.
- if (!TLI->has(LibFunc::memset))
- return false;
+/// getMemSetPatternValue - If a strided store of the specified value is safe to
+/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
+/// be passed in. Otherwise, return null.
+///
+/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
+/// just replicate their input array and then pass on to memset_pattern16.
+static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) {
+ // If the value isn't a constant, we can't promote it to being in a constant
+ // array. We could theoretically do a store to an alloca or something, but
+ // that doesn't seem worthwhile.
+ Constant *C = dyn_cast<Constant>(V);
+ if (C == 0) return 0;
+
+ // Only handle simple values that are a power of two bytes in size.
+ uint64_t Size = TD.getTypeSizeInBits(V->getType());
+ if (Size == 0 || (Size & 7) || (Size & (Size-1)))
+ return 0;
+
+ // Convert the constant to an integer type of the appropriate size so we can
+ // start hacking on it.
+ if (isa<PointerType>(V->getType()))
+ C = ConstantExpr::getPtrToInt(C, IntegerType::get(C->getContext(), Size));
+ else if (isa<VectorType>(V->getType()) || V->getType()->isFloatingPointTy())
+ C = ConstantExpr::getBitCast(C, IntegerType::get(C->getContext(), Size));
+ else if (!isa<IntegerType>(V->getType()))
+ return 0; // Unhandled type.
+
+ // Convert to size in bytes.
+ Size /= 8;
+
+ // If we couldn't fold this to an integer, we fail. We don't bother to handle
+ // relocatable expressions like the address of a global yet.
+ // FIXME!
+ ConstantInt *CI = dyn_cast<ConstantInt>(C);
+ if (CI == 0) return 0;
+
+ APInt CVal = CI->getValue();
+
+ // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
+ // if the top and bottom are the same.
+ if (Size > 16) return 0;
+
+ // If this is a big endian target (PPC) then we need to bswap.
+ if (TD.isBigEndian())
+ CVal = CVal.byteSwap();
+
+ // Determine what each byte of the pattern value should be.
+ char Value[16];
+ for (unsigned i = 0; i != 16; ++i) {
+ // Get the byte value we're indexing into.
+ unsigned CByte = i % Size;
+ Value[i] = (unsigned char)(CVal.getZExtValue() >> CByte);
+ }
+
+ return ConstantArray::get(V->getContext(), StringRef(Value, 16), false);
+}
+
+/// processLoopStridedStore - We see a strided store of some value. If we can
+/// transform this into a memset or memset_pattern in the loop preheader, do so.
+bool LoopIdiomRecognize::
+processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+ unsigned StoreAlignment, Value *StoredVal,
+ Instruction *TheStore, const SCEVAddRecExpr *Ev,
+ const SCEV *BECount) {
- // Verify that the stored value is loop invariant. If not, we can't promote
- // the memset.
- if (!CurLoop->isLoopInvariant(SplatValue))
+ // If the stored value is a byte-wise value (like i32 -1), then it may be
+ // turned into a memset of i8 -1, assuming that all the consecutive bytes
+ // are stored. A store of i32 0x01020304 can never be turned into a memset,
+ // but it can be turned into memset_pattern if the target supports it.
+ Value *SplatValue = isBytewiseValue(StoredVal);
+ Constant *PatternValue = 0;
+
+ // If we're allowed to form a memset, and the stored value would be acceptable
+ // for memset, use it.
+ if (SplatValue && TLI->has(LibFunc::memset) &&
+ // Verify that the stored value is loop invariant. If not, we can't
+ // promote the memset.
+ CurLoop->isLoopInvariant(SplatValue)) {
+ // Keep and use SplatValue.
+ PatternValue = 0;
+ } else if (TLI->has(LibFunc::memset_pattern16) &&
+ (PatternValue = getMemSetPatternValue(StoredVal, *TD))) {
+ // It looks like we can use PatternValue!
+ SplatValue = 0;
+ } else {
+ // Otherwise, this isn't an idiom we can transform. For example, we can't
+ // do anything with a 3-byte store, for example.
return false;
+ }
+
// Okay, we have a strided store "p[i]" of a splattable value. We can turn
// this into a memset in the loop preheader now if we want. However, this
@@ -415,7 +488,7 @@
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- const Type *IntPtr = TD->getIntPtrType(SplatValue->getContext());
+ const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
@@ -427,8 +500,28 @@
Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
- Value *NewCall =
- Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, StoreAlignment);
+ Value *NewCall;
+ if (SplatValue)
+ NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment);
+ else {
+ Module *M = TheStore->getParent()->getParent()->getParent();
+ Value *MSP = M->getOrInsertFunction("memset_pattern16",
+ Builder.getVoidTy(),
+ Builder.getInt8PtrTy(),
+ Builder.getInt8PtrTy(), IntPtr,
+ (void*)0);
+
+ // Otherwise we should form a memset_pattern16. PatternValue is known to be
+ // an constant array of 16-bytes. Plop the value into a mergable global.
+ GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
+ GlobalValue::InternalLinkage,
+ PatternValue, ".memset_pattern");
+ GV->setUnnamedAddr(true); // Ok to merge these.
+ GV->setAlignment(16);
+ Value *PatternPtr = Builder.CreateConstInBoundsGEP2_32(GV, 0, 0, "pattern");
+
+ NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
+ }
DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
<< " from store to: " << *Ev << " at: " << *TheStore << "\n");
Modified: llvm/trunk/test/Transforms/LoopIdiom/basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/basic.ll?rev=126040&r1=126039&r2=126040&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopIdiom/basic.ll (original)
+++ llvm/trunk/test/Transforms/LoopIdiom/basic.ll Sat Feb 19 13:31:39 2011
@@ -273,3 +273,30 @@
; CHECK-NOT: store
; CHECK: ret void
}
+
+; On darwin10 (which is the triple in this .ll file) this loop can be turned
+; into a memset_pattern call.
+; rdar://9009151
+define void @test11(i32* nocapture %P) nounwind ssp {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
+ %arrayidx = getelementptr i32* %P, i64 %indvar
+ store i32 1, i32* %arrayidx, align 4
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, 10000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+; CHECK: @test11
+; CHECK-NEXT: entry:
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: memset_pattern
+; CHECK-NOT: store
+; CHECK: ret void
+}
+
+
More information about the llvm-commits
mailing list