<br><br>On Tue, Oct 30, 2012 at 9:17 AM, Benjamin Kramer <<a href="mailto:benny.kra@gmail.com">benny.kra@gmail.com</a>> wrote:<br>><br>> On 29.10.2012, at 19:10, Preston Briggs <<a href="mailto:preston.briggs@gmail.com">preston.briggs@gmail.com</a>> wrote:<br>
><br>>> I don't understand all this code, but I have a few comments inline.<br>><br>> Thanks for the review :) I'm still working out how to make good use of all the details you DA provides.<br>><br>
> One important thing about this pass (and all other LoopPasses) is that nested<div>> loops are visited inside-out, starting with the most deeply nested loop.</div><div><br></div><div>Right, makes good sense.</div><div>
<br><br>>>> Author: d0k<br>>>> Date: Sat Oct 27 09:25:44 2012<br>>>> New Revision: 166874<br>>>><br>>>> URL: <a href="http://llvm.org/viewvc/llvm-project?rev=166874&view=rev">http://llvm.org/viewvc/llvm-project?rev=166874&view=rev</a><br>
>>> Log:<br>>>> LoopIdiom: Replace custom dependence analysis with DependenceAnalysis.<br>>><br>>> :-)<br>>><br>>>> Requires a lot less code and complexity on loop-idiom's side and the more<br>
>>> precise analysis can catch more cases, like the one I included as a test case.<br>>>> This also fixes the edge-case miscompilation from PR9481.<br>>>><br>>>> Compile time performance seems to be slightly worse, but this is mostly due<br>
>>> to an extra LCSSA run scheduled by the PassManager and should be fixed there.<br>>>><br>>>> Added:<br>>>> llvm/trunk/test/Transforms/LoopIdiom/multi-dimensional.ll<br>>>> llvm/trunk/test/Transforms/LoopIdiom/sideeffect.ll<br>
>>> Modified:<br>>>> llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp<br>>>><br>>>> Modified: llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp<br>>>> URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp?rev=166874&r1=166873&r2=166874&view=diff">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp?rev=166874&r1=166873&r2=166874&view=diff</a><br>
>>> ==============================================================================<br>>>> --- llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp (original)<br>>>> +++ llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp Sat Oct 27 09:25:44 2012<br>
>>> @@ -48,6 +48,7 @@<br>>>> #include "llvm/Module.h"<br>>>> #include "llvm/ADT/Statistic.h"<br>>>> #include "llvm/Analysis/AliasAnalysis.h"<br>>>> +#include "llvm/Analysis/DependenceAnalysis.h"<br>
>>> #include "llvm/Analysis/LoopPass.h"<br>>>> #include "llvm/Analysis/ScalarEvolutionExpander.h"<br>>>> #include "llvm/Analysis/ScalarEvolutionExpressions.h"<br>>>> @@ -106,6 +107,8 @@<br>
>>> AU.addPreserved<AliasAnalysis>();<br>>>> AU.addRequired<ScalarEvolution>();<br>>>> AU.addPreserved<ScalarEvolution>();<br>>>> + AU.addRequired<DependenceAnalysis>();<br>
>>> + AU.addPreserved<DependenceAnalysis>();<br>>>> AU.addPreserved<DominatorTree>();<br>>>> AU.addRequired<DominatorTree>();<br>>>> AU.addRequired<TargetLibraryInfo>();<br>
>>> @@ -122,6 +125,7 @@<br>>>> INITIALIZE_PASS_DEPENDENCY(LCSSA)<br>>>> INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)<br>>>> INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)<br>>>> +INITIALIZE_PASS_DEPENDENCY(DependenceAnalysis)<br>
>>> INITIALIZE_AG_DEPENDENCY(AliasAnalysis)<br>>>> INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",<br>>>> false, false)<br>
>>> @@ -163,15 +167,6 @@<br>>>> } while (!NowDeadInsts.empty());<br>>>> }<br>>>><br>>>> -/// deleteIfDeadInstruction - If the specified value is a dead instruction,<br>>>> -/// delete it and any recursively used instructions.<br>
>>> -static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE,<br>>>> - const TargetLibraryInfo *TLI) {<br>>>> - if (Instruction *I = dyn_cast<Instruction>(V))<br>
>>> - if (isInstructionTriviallyDead(I, TLI))<br>>>> - deleteDeadInstruction(I, SE, TLI);<br>>>> -}<br>>>> -<br>>>> bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {<br>
>>> CurLoop = L;<br>>>><br>>>> @@ -368,40 +363,6 @@<br>>>> MSI, Ev, BECount);<br>>>> }<br>>>><br>>>> -<br>>>> -/// mayLoopAccessLocation - Return true if the specified loop might access the<br>
>>> -/// specified pointer location, which is a loop-strided access. The 'Access'<br>>>> -/// argument specifies what the verboten forms of access are (read or write).<br>>>> -static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,<br>
>>> - Loop *L, const SCEV *BECount,<br>>>> - unsigned StoreSize, AliasAnalysis &AA,<br>>>> - Instruction *IgnoredStore) {<br>
>>> - // Get the location that may be stored across the loop. Since the access is<br>>>> - // strided positively through memory, we say that the modified location starts<br>>>> - // at the pointer and has infinite size.<br>
>>> - uint64_t AccessSize = AliasAnalysis::UnknownSize;<br>>>> -<br>>>> - // If the loop iterates a fixed number of times, we can refine the access size<br>>>> - // to be exactly the size of the memset, which is (BECount+1)*StoreSize<br>
>>> - if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))<br>>>> - AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;<br>>>> -<br>>>> - // TODO: For this to be really effective, we have to dive into the pointer<br>
>>> - // operand in the store. Store to &A[i] of 100 will always return may alias<br>>>> - // with store of &A[100], we need to StoreLoc to be "A" with size of 100,<br>>>> - // which will then no-alias a store to &A[100].<br>
>>> - AliasAnalysis::Location StoreLoc(Ptr, AccessSize);<br>>>> -<br>>>> - for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;<br>>>> - ++BI)<br>
>>> - for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)<br>>>> - if (&*I != IgnoredStore &&<br>>>> - (AA.getModRefInfo(I, StoreLoc) & Access))<br>
>>> - return true;<br>>>> -<br>>>> - return false;<br>>>> -}<br>>>> -<br>>>> /// getMemSetPatternValue - If a strided store of the specified value is safe to<br>
>>> /// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should<br>>>> /// be passed in. Otherwise, return null.<br>>>> @@ -474,6 +435,18 @@<br>>>> return false;<br>
>>> }<br>>>><br>>>> + // Make sure the store has no dependencies (i.e. other loads and stores) in<br>>>> + // the loop.<br>>>> + DependenceAnalysis &DA = getAnalysis<DependenceAnalysis>();<br>
>>> + for (Loop::block_iterator BI = CurLoop->block_begin(),<br>>>> + BE = CurLoop->block_end(); BI != BE; ++BI)<br>>>> + for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)<br>
>>> + if (&*I != TheStore && I->mayReadOrWriteMemory()) {<br>>>> + OwningPtr<Dependence> D(DA.depends(TheStore, I, true));<br>>>> + if (D)<br>>>> + return false;<br>
>><br>>> This seems pessimistic. If the candidate loop is nested, we could<br>>> still find some idioms in the presence of dependences. We should check<br>>> to see if there's a loop-independent dependence or if there's a<br>
>> dependence at the innermost level.<br>><br>> The code was adapted from the previous custom analysis so there is<br>> a high possibility that it can be made a lot more aggressive. However,<br>> if we can turn the inner loop into a function call, the pass would've done<br>
> that when visiting the inner loop. Am I missing something?<br><br>Loops are numbered from 1 (outermost) to n (innermost).<br>Instructions not contained in a loop are said to be at level 0.<br>So if we have code like this<br>
<br><blockquote style="margin:0 0 0 40px;border:none;padding:0px"><font face="courier new, monospace"><b>while (...) {<br> ...<br> for (i = 0; i < n; i++)<br> A[i] = A[i + n];<br> ...<br>}</b></font></blockquote>
<br>there will be dependences between the various references to A<div>(in your code above, D won't be NULL). D->getDirection(1) will</div><div>be ALL (and D->isScalar(1) will be true), but D->getDirection(2)</div>
<div>should be NONE, so you ought to be able to replace the inner loop</div><div>with a call to memmove.</div><div><br><div><br><br>>>> + }<br>>>> +<br>>>> // The trip count of the loop and the base pointer of the addrec SCEV is<br>
>>> // guaranteed to be loop invariant, which means that it should dominate the<br>>>> // header. This allows us to insert code for it in the preheader.<br>>>> @@ -484,8 +457,7 @@<br>>>> // Okay, we have a strided store "p[i]" of a splattable value. We can turn<br>
>>> // this into a memset in the loop preheader now if we want. However, this<br>>>> // would be unsafe to do if there is anything else in the loop that may read<br>>>> - // or write to the aliased location. Check for any overlap by generating the<br>
>>> - // base pointer and checking the region.<br>>>> + // or write to the aliased location.<br>>>> assert(DestPtr->getType()->isPointerTy()<br>>>> && "Must be a pointer type.");<br>
>>> unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();<br>>>> @@ -494,15 +466,6 @@<br>>>> Preheader->getTerminator());<br>>>><br>>>><br>
>>> - if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef,<br>>>> - CurLoop, BECount,<br>>>> - StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){<br>
>>> - Expander.clear();<br>>>> - // If we generated new code for the base pointer, clean up.<br>>>> - deleteIfDeadInstruction(BasePtr, *SE, TLI);<br>>>> - return false;<br>>>> - }<br>
>>> -<br>>>> // Okay, everything looks good, insert the memset.<br>>>><br>>>> // The # stored bytes is (BECount+1)*Size. Expand the trip count out to<br>>>> @@ -565,6 +528,33 @@<br>
>>><br>>>> LoadInst *LI = cast<LoadInst>(SI->getValueOperand());<br>>>><br>>>> + // Make sure the load and the store have no dependencies (i.e. other loads and<br>>>> + // stores) in the loop. We ignore the direct dependency between SI and LI here<br>
>>> + // and check it later.<br>>>> + DependenceAnalysis &DA = getAnalysis<DependenceAnalysis>();<br>>>> + for (Loop::block_iterator BI = CurLoop->block_begin(),<br>>>> + BE = CurLoop->block_end(); BI != BE; ++BI)<br>
>>> + for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)<br>>>> + if (&*I != SI && &*I != LI && I->mayReadOrWriteMemory()) {<br>>>> + // First, check if there is a dependence of the store.<br>
>>> + OwningPtr<Dependence> DS(DA.depends(SI, I, true));<br>>>> + if (DS)<br>>>> + return false;<br>>>> + // If the scanned instructon may modify memory then we also have to<br>
>>> + // check for dependencys on the load.<br>>>> + if (I->mayWriteToMemory()) {<br>>>> + OwningPtr<Dependence> DL(DA.depends(I, LI, true));<br>>>> + if (DL)<br>
>>> + return false;<br>>>> + }<br>>>> + }<br>>>> +<br>>>> + // Now check the dependency between SI and LI. If there is no dependency we<br>>>> + // can safely emit a memcpy.<br>
>>> + OwningPtr<Dependence> Dep(DA.depends(SI, LI, true));<br>>>> + if (Dep)<br>>>> + return false;<br>>>> +<br>>>> // The trip count of the loop and the base pointer of the addrec SCEV is<br>
>>> // guaranteed to be loop invariant, which means that it should dominate the<br>>>> // header. This allows us to insert code for it in the preheader.<br>>>> @@ -573,41 +563,16 @@<br>>>> SCEVExpander Expander(*SE, "loop-idiom");<br>
>>><br>>>> // Okay, we have a strided store "p[i]" of a loaded value. We can turn<br>>>> - // this into a memcpy in the loop preheader now if we want. However, this<br>>>> - // would be unsafe to do if there is anything else in the loop that may read<br>
>>> - // or write the memory region we're storing to. This includes the load that<br>>>> - // feeds the stores. Check for an alias by generating the base address and<br>>>> - // checking everything.<br>
>>> + // this into a memcpy in the loop preheader now if we want.<br>>>> Value *StoreBasePtr =<br>>>> Expander.expandCodeFor(StoreEv->getStart(),<br>>>> Builder.getInt8PtrTy(SI->getPointerAddressSpace()),<br>
>>> Preheader->getTerminator());<br>>>> -<br>>>> - if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef,<br>>>> - CurLoop, BECount, StoreSize,<br>
>>> - getAnalysis<AliasAnalysis>(), SI)) {<br>>>> - Expander.clear();<br>>>> - // If we generated new code for the base pointer, clean up.<br>>>> - deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);<br>
>>> - return false;<br>>>> - }<br>>>> -<br>>>> - // For a memcpy, we have to make sure that the input array is not being<br>>>> - // mutated by the loop.<br>>>> Value *LoadBasePtr =<br>
>>> Expander.expandCodeFor(LoadEv->getStart(),<br>>>> Builder.getInt8PtrTy(LI->getPointerAddressSpace()),<br>>>> Preheader->getTerminator());<br>
>>><br>>>> - if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount,<br>>>> - StoreSize, getAnalysis<AliasAnalysis>(), SI)) {<br>>>> - Expander.clear();<br>
>>> - // If we generated new code for the base pointer, clean up.<br>>>> - deleteIfDeadInstruction(LoadBasePtr, *SE, TLI);<br>>>> - deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);<br>>>> - return false;<br>
>>> - }<br>>>> -<br>>>> // Okay, everything is safe, we can transform this!<br>>>><br>>>><br>>>><br>>>> Added: llvm/trunk/test/Transforms/LoopIdiom/multi-dimensional.ll<br>
>>> URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/multi-dimensional.ll?rev=166874&view=auto">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/multi-dimensional.ll?rev=166874&view=auto</a><br>
>>> ==============================================================================<br>>>> --- llvm/trunk/test/Transforms/LoopIdiom/multi-dimensional.ll (added)<br>>>> +++ llvm/trunk/test/Transforms/LoopIdiom/multi-dimensional.ll Sat Oct 27 09:25:44 2012<br>
>>> @@ -0,0 +1,49 @@<br>>>> +; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s<br>>>> +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"<br>
>>> +target triple = "x86_64-apple-darwin10.0.0"<br>>>> +<br>>>> +%struct.ham = type { [2 x [2 x [2 x [16 x [8 x i32]]]]], i32, %struct.zot }<br>>>> +%struct.zot = type { i32, i16, i16, [2 x [1152 x i32]] }<br>
>>> +<br>>>> +define void @test1(%struct.ham* nocapture %arg) nounwind {<br>>>> +bb:<br>>>> + br label %bb1<br>>>> +<br>>>> +bb1: ; preds = %bb11, %bb<br>
>>> + %tmp = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ]<br>>>> + br label %bb2<br>>>> +<br>>>> +bb2: ; preds = %bb2, %bb1<br>>>> + %tmp3 = phi i64 [ 0, %bb1 ], [ %tmp8, %bb2 ]<br>
>>> + %tmp4 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 1, i64 1, i64 %tmp, i64 %tmp3<br>>>> + store i32 0, i32* %tmp4, align 4<br>>>> + %tmp5 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 1, i64 0, i64 %tmp, i64 %tmp3<br>
>>> + store i32 0, i32* %tmp5, align 4<br>>>> + %tmp6 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 0, i64 1, i64 %tmp, i64 %tmp3<br>>>> + store i32 0, i32* %tmp6, align 4<br>
>>> + %tmp7 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 0, i64 0, i64 %tmp, i64 %tmp3<br>>>> + store i32 0, i32* %tmp7, align 4<br>>>> + %tmp8 = add i64 %tmp3, 1<br>>>> + %tmp9 = trunc i64 %tmp8 to i32<br>
>>> + %tmp10 = icmp eq i32 %tmp9, 8<br>>>> + br i1 %tmp10, label %bb11, label %bb2<br>>>> +<br>>>> +bb11: ; preds = %bb2<br>>>> + %tmp12 = add i64 %tmp, 1<br>
>>> + %tmp13 = trunc i64 %tmp12 to i32<br>>>> + %tmp14 = icmp eq i32 %tmp13, 16<br>>>> + br i1 %tmp14, label %bb15, label %bb1<br>>>> +<br>>>> +bb15: ; preds = %bb11<br>
>>> + ret void<br>>>> +<br>>>> +; CHECK: @test1<br>>>> +; CHECK: bb1:<br>>>> +; CHECK-NOT: store<br>>>> +; CHECK: call void @llvm.memset.p0i8.i64<br>>>> +; CHECK-NEXT: call void @llvm.memset.p0i8.i64<br>
>>> +; CHECK-NEXT: call void @llvm.memset.p0i8.i64<br>>>> +; CHECK-NEXT: call void @llvm.memset.p0i8.i64<br>>>> +; CHECK-NOT: store<br>>>> +; CHECK: br<br>>>> +}<br>>>><br>
>>> Added: llvm/trunk/test/Transforms/LoopIdiom/sideeffect.ll<br>>>> URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/sideeffect.ll?rev=166874&view=auto">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/sideeffect.ll?rev=166874&view=auto</a><br>
>>> ==============================================================================<br>>>> --- llvm/trunk/test/Transforms/LoopIdiom/sideeffect.ll (added)<br>>>> +++ llvm/trunk/test/Transforms/LoopIdiom/sideeffect.ll Sat Oct 27 09:25:44 2012<br>
>>> @@ -0,0 +1,53 @@<br>>>> +; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s<br>>>> +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"<br>
>>> +target triple = "x86_64-apple-darwin10.0.0"<br>>>> +<br>>>> +; PR9481<br>>>> +define i32 @test1() nounwind uwtable ssp {<br>>>> +entry:<br>>>> + %a = alloca [10 x i8], align 1<br>
>>> + br label %for.body<br>>>> +<br>>>> +for.cond1.preheader: ; preds = %for.body<br>>>> + %arrayidx5.phi.trans.insert = getelementptr inbounds [10 x i8]* %a, i64 0, i64 0<br>
>>> + %.pre = load i8* %arrayidx5.phi.trans.insert, align 1<br>>>> + br label %for.body3<br>>>> +<br>>>> +for.body: ; preds = %for.body, %entry<br>
>>> + %indvars.iv29 = phi i64 [ 0, %entry ], [ %indvars.iv.next30, %for.body ]<br>>>> + call void (...)* @bar() nounwind<br>>>> + %arrayidx = getelementptr inbounds [10 x i8]* %a, i64 0, i64 %indvars.iv29<br>
>>> + store i8 23, i8* %arrayidx, align 1<br>>>> + %indvars.iv.next30 = add i64 %indvars.iv29, 1<br>>>> + %lftr.wideiv31 = trunc i64 %indvars.iv.next30 to i32<br>>>> + %exitcond32 = icmp eq i32 %lftr.wideiv31, 1000000<br>
>>> + br i1 %exitcond32, label %for.cond1.preheader, label %for.body<br>>>> +<br>>>> +for.body3: ; preds = %for.body3, %for.cond1.preheader<br>>>> + %0 = phi i8 [ %.pre, %for.cond1.preheader ], [ %add, %for.body3 ]<br>
>>> + %indvars.iv = phi i64 [ 1, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]<br>>>> + call void (...)* @bar() nounwind<br>>>> + %arrayidx7 = getelementptr inbounds [10 x i8]* %a, i64 0, i64 %indvars.iv<br>
>>> + %1 = load i8* %arrayidx7, align 1<br>>>> + %add = add i8 %1, %0<br>>>> + store i8 %add, i8* %arrayidx7, align 1<br>>>> + %indvars.iv.next = add i64 %indvars.iv, 1<br>>>> + %lftr.wideiv = trunc i64 %indvars.iv.next to i32<br>
>>> + %exitcond = icmp eq i32 %lftr.wideiv, 1000000<br>>>> + br i1 %exitcond, label %for.end12, label %for.body3<br>>>> +<br>>>> +for.end12: ; preds = %for.body3<br>
>>> + %arrayidx13 = getelementptr inbounds [10 x i8]* %a, i64 0, i64 2<br>>>> + %2 = load i8* %arrayidx13, align 1<br>>>> + %conv14 = sext i8 %2 to i32<br>>>> + %arrayidx15 = getelementptr inbounds [10 x i8]* %a, i64 0, i64 6<br>
>>> + %3 = load i8* %arrayidx15, align 1<br>>>> + %conv16 = sext i8 %3 to i32<br>>>> + %add17 = add nsw i32 %conv16, %conv14<br>>>> + ret i32 %add17<br>>>> +<br>>>> +; CHECK: @test1<br>
>>> +; CHECK-NOT: @llvm.memset<br>>>> +}<br>>>> +<br>>>> +declare void @bar(...)<br>>>><br>>>><br>>>><br>>>><br>>>> ------------------------------<br>
>>><br>>>> Message: 19<br>>>> Date: Sat, 27 Oct 2012 14:25:51 -0000<br>>>> From: Benjamin Kramer <<a href="mailto:benny.kra@googlemail.com">benny.kra@googlemail.com</a>><br>>>> To: <a href="mailto:llvm-commits@cs.uiuc.edu">llvm-commits@cs.uiuc.edu</a><br>
>>> Subject: [llvm-commits] [llvm] r166875 - in /llvm/trunk:<br>>>> lib/Transforms/Scalar/LoopIdiomRecognize.cpp<br>>>> test/Transforms/LoopIdiom/basic.ll<br>>>> Message-ID: <<a href="mailto:20121027142551.C47DF2A6C065@llvm.org">20121027142551.C47DF2A6C065@llvm.org</a>><br>
>>> Content-Type: text/plain; charset="utf-8"<br>>>><br>>>> Author: d0k<br>>>> Date: Sat Oct 27 09:25:51 2012<br>>>> New Revision: 166875<br>>>><br>>>> URL: <a href="http://llvm.org/viewvc/llvm-project?rev=166875&view=rev">http://llvm.org/viewvc/llvm-project?rev=166875&view=rev</a><br>
>>> Log:<br>>>> LoopIdiom: Recognize memmove loops.<br>>>><br>>>> This turns loops like<br>>>> for (unsigned i = 0; i != n; ++i)<br>>>> p[i] = p[i+1];<br>>>> into memmove, which has a highly optimized implementation in most libcs.<br>
>>><br>>>> This was really easy with the new DependenceAnalysis :)<br>>>><br>>>> Modified:<br>>>> llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp<br>>>> llvm/trunk/test/Transforms/LoopIdiom/basic.ll<br>
>>><br>>>> Modified: llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp<br>>>> URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp?rev=166875&r1=166874&r2=166875&view=diff">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp?rev=166875&r1=166874&r2=166875&view=diff</a><br>
>>> ==============================================================================<br>>>> --- llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp (original)<br>>>> +++ llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp Sat Oct 27 09:25:51 2012<br>
>>> @@ -16,7 +16,7 @@<br>>>> // TODO List:<br>>>> //<br>>>> // Future loop memory idioms to recognize:<br>>>> -// memcmp, memmove, strlen, etc.<br>>>> +// memcmp, strlen, etc.<br>
>>> // Future floating point idioms to recognize in -ffast-math mode:<br>>>> // fpowi<br>>>> // Future integer operation idioms to recognize:<br>>>> @@ -60,8 +60,9 @@<br>>>> #include "llvm/Transforms/Utils/Local.h"<br>
>>> using namespace llvm;<br>>>><br>>>> -STATISTIC(NumMemSet, "Number of memset's formed from loop stores");<br>>>> -STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");<br>
>>> +STATISTIC(NumMemSet, "Number of memsets formed from loop stores");<br>>>> +STATISTIC(NumMemCpy, "Number of memcpys formed from loop load+stores");<br>>>> +STATISTIC(NumMemMove, "Number of memmoves formed from loop load+stores");<br>
>>><br>>>> namespace {<br>>>> class LoopIdiomRecognize : public LoopPass {<br>>>> @@ -532,6 +533,7 @@<br>>>> // stores) in the loop. We ignore the direct dependency between SI and LI here<br>
>>> // and check it later.<br>>>> DependenceAnalysis &DA = getAnalysis<DependenceAnalysis>();<br>>>> + bool isMemcpySafe = true;<br>>>> for (Loop::block_iterator BI = CurLoop->block_begin(),<br>
>>> BE = CurLoop->block_end(); BI != BE; ++BI)<br>>>> for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)<br>>>> @@ -552,8 +554,14 @@<br>>>> // Now check the dependency between SI and LI. If there is no dependency we<br>
>>> // can safely emit a memcpy.<br>>>> OwningPtr<Dependence> Dep(DA.depends(SI, LI, true));<br>>>> - if (Dep)<br>>>> - return false;<br>>>> + if (Dep) {<br>>>> + // If there is a dependence but the direction is positive we can still<br>
>>> + // safely turn this into memmove.<br>>>> + if (Dep->getLevels() != 1 ||<br>>>> + Dep->getDirection(1) != Dependence::DVEntry::GT)<br>>>> + return false;<br>
>><br>>> Why the restriction to an un-nested loop? Couldn't this work for a<br>>> deeply-nested loop?<br>><br>> LI and SI are always in the same loop, so the level cannot be != 1 here, right?</div>
<div><br></div><div>The level of a dependence is the number of enclosing loops that</div></div><div>the two instructions have in common. If LI and SI are always in the</div><div>same loop, then the level is exactly the number of loops enclosing them.</div>
<div>For a more complex example, see DependenceAnalysis.h, near line 480.</div><div><br></div><div>Preston</div><div><br></div></div>