<br><br>On Tue, Oct 30, 2012 at 9:17 AM, Benjamin Kramer <<a href="mailto:benny.kra@gmail.com">benny.kra@gmail.com</a>> wrote:<br>><br>> On 29.10.2012, at 19:10, Preston Briggs <<a href="mailto:preston.briggs@gmail.com">preston.briggs@gmail.com</a>> wrote:<br>

><br>>> I don't understand all this code, but I have a few comments inline.<br>><br>> Thanks for the review :) I'm still working out how to make good use of all the details you DA provides.<br>><br>

> One important thing about this pass (and all other LoopPasses) is that nested<div>> loops are visited inside-out, starting with the most deeply nested loop.</div><div><br></div><div>Right, makes good sense.</div><div>

<br><br>>>> Author: d0k<br>>>> Date: Sat Oct 27 09:25:44 2012<br>>>> New Revision: 166874<br>>>><br>>>> URL: <a href="http://llvm.org/viewvc/llvm-project?rev=166874&view=rev">http://llvm.org/viewvc/llvm-project?rev=166874&view=rev</a><br>

>>> Log:<br>>>> LoopIdiom: Replace custom dependence analysis with DependenceAnalysis.<br>>><br>>> :-)<br>>><br>>>> Requires a lot less code and complexity on loop-idiom's side and the more<br>

>>> precise analysis can catch more cases, like the one I included as a test case.<br>>>> This also fixes the edge-case miscompilation from PR9481.<br>>>><br>>>> Compile time performance seems to be slightly worse, but this is mostly due<br>

>>> to an extra LCSSA run scheduled by the PassManager and should be fixed there.<br>>>><br>>>> Added:<br>>>>    llvm/trunk/test/Transforms/LoopIdiom/multi-dimensional.ll<br>>>>    llvm/trunk/test/Transforms/LoopIdiom/sideeffect.ll<br>

>>> Modified:<br>>>>    llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp<br>>>><br>>>> Modified: llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp<br>>>> URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp?rev=166874&r1=166873&r2=166874&view=diff">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp?rev=166874&r1=166873&r2=166874&view=diff</a><br>

>>> ==============================================================================<br>>>> --- llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp (original)<br>>>> +++ llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp Sat Oct 27 09:25:44 2012<br>

>>> @@ -48,6 +48,7 @@<br>>>> #include "llvm/Module.h"<br>>>> #include "llvm/ADT/Statistic.h"<br>>>> #include "llvm/Analysis/AliasAnalysis.h"<br>>>> +#include "llvm/Analysis/DependenceAnalysis.h"<br>

>>> #include "llvm/Analysis/LoopPass.h"<br>>>> #include "llvm/Analysis/ScalarEvolutionExpander.h"<br>>>> #include "llvm/Analysis/ScalarEvolutionExpressions.h"<br>>>> @@ -106,6 +107,8 @@<br>

>>>       AU.addPreserved<AliasAnalysis>();<br>>>>       AU.addRequired<ScalarEvolution>();<br>>>>       AU.addPreserved<ScalarEvolution>();<br>>>> +      AU.addRequired<DependenceAnalysis>();<br>

>>> +      AU.addPreserved<DependenceAnalysis>();<br>>>>       AU.addPreserved<DominatorTree>();<br>>>>       AU.addRequired<DominatorTree>();<br>>>>       AU.addRequired<TargetLibraryInfo>();<br>

>>> @@ -122,6 +125,7 @@<br>>>> INITIALIZE_PASS_DEPENDENCY(LCSSA)<br>>>> INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)<br>>>> INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)<br>>>> +INITIALIZE_PASS_DEPENDENCY(DependenceAnalysis)<br>

>>> INITIALIZE_AG_DEPENDENCY(AliasAnalysis)<br>>>> INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",<br>>>>                     false, false)<br>

>>> @@ -163,15 +167,6 @@<br>>>>   } while (!NowDeadInsts.empty());<br>>>> }<br>>>><br>>>> -/// deleteIfDeadInstruction - If the specified value is a dead instruction,<br>>>> -/// delete it and any recursively used instructions.<br>

>>> -static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE,<br>>>> -                                    const TargetLibraryInfo *TLI) {<br>>>> -  if (Instruction *I = dyn_cast<Instruction>(V))<br>

>>> -    if (isInstructionTriviallyDead(I, TLI))<br>>>> -      deleteDeadInstruction(I, SE, TLI);<br>>>> -}<br>>>> -<br>>>> bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {<br>

>>>   CurLoop = L;<br>>>><br>>>> @@ -368,40 +363,6 @@<br>>>>                                  MSI, Ev, BECount);<br>>>> }<br>>>><br>>>> -<br>>>> -/// mayLoopAccessLocation - Return true if the specified loop might access the<br>

>>> -/// specified pointer location, which is a loop-strided access.  The 'Access'<br>>>> -/// argument specifies what the verboten forms of access are (read or write).<br>>>> -static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,<br>

>>> -                                  Loop *L, const SCEV *BECount,<br>>>> -                                  unsigned StoreSize, AliasAnalysis &AA,<br>>>> -                                  Instruction *IgnoredStore) {<br>

>>> -  // Get the location that may be stored across the loop.  Since the access is<br>>>> -  // strided positively through memory, we say that the modified location starts<br>>>> -  // at the pointer and has infinite size.<br>

>>> -  uint64_t AccessSize = AliasAnalysis::UnknownSize;<br>>>> -<br>>>> -  // If the loop iterates a fixed number of times, we can refine the access size<br>>>> -  // to be exactly the size of the memset, which is (BECount+1)*StoreSize<br>

>>> -  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))<br>>>> -    AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;<br>>>> -<br>>>> -  // TODO: For this to be really effective, we have to dive into the pointer<br>

>>> -  // operand in the store.  Store to &A[i] of 100 will always return may alias<br>>>> -  // with store of &A[100], we need to StoreLoc to be "A" with size of 100,<br>>>> -  // which will then no-alias a store to &A[100].<br>

>>> -  AliasAnalysis::Location StoreLoc(Ptr, AccessSize);<br>>>> -<br>>>> -  for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;<br>>>> -       ++BI)<br>

>>> -    for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)<br>>>> -      if (&*I != IgnoredStore &&<br>>>> -          (AA.getModRefInfo(I, StoreLoc) & Access))<br>

>>> -        return true;<br>>>> -<br>>>> -  return false;<br>>>> -}<br>>>> -<br>>>> /// getMemSetPatternValue - If a strided store of the specified value is safe to<br>

>>> /// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should<br>>>> /// be passed in.  Otherwise, return null.<br>>>> @@ -474,6 +435,18 @@<br>>>>     return false;<br>

>>>   }<br>>>><br>>>> +  // Make sure the store has no dependencies (i.e. other loads and stores) in<br>>>> +  // the loop.<br>>>> +  DependenceAnalysis &DA = getAnalysis<DependenceAnalysis>();<br>

>>> +  for (Loop::block_iterator BI = CurLoop->block_begin(),<br>>>> +       BE = CurLoop->block_end(); BI != BE; ++BI)<br>>>> +    for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)<br>

>>> +      if (&*I != TheStore && I->mayReadOrWriteMemory()) {<br>>>> +        OwningPtr<Dependence> D(DA.depends(TheStore, I, true));<br>>>> +        if (D)<br>>>> +          return false;<br>

>><br>>> This seems pessimistic. If the candidate loop is nested, we could<br>>> still find some idioms in the presence of dependences. We should check<br>>> to see if there's a loop-independent dependence or if there's a<br>

>> dependence at the innermost level.<br>><br>> The code was adapted from the previous custom analysis so there is<br>> a high possibility that it can be made a lot more aggressive. However,<br>> if we can turn the inner loop into a function call, the pass would've done<br>

> that when visiting the inner loop. Am I missing something?<br><br>Loops are numbered from 1 (outermost) to n (innermost).<br>Instructions not contained in a loop are said to be at level 0.<br>So if we have code like this<br>

<br><blockquote style="margin:0 0 0 40px;border:none;padding:0px"><font face="courier new, monospace"><b>while (...) {<br>   ...<br>   for (i = 0; i < n; i++)<br>      A[i] = A[i + n];<br>   ...<br>}</b></font></blockquote>

<br>there will be dependences between the various references to A<div>(in your code above, D won't be NULL). D->getDirection(1) will</div><div>be ALL (and D->isScalar(1) will be true), but D->getDirection(2)</div>

<div>should be NONE, so you ought to be able to replace the inner loop</div><div>with a call to memmove.</div><div><br><div><br><br>>>> +      }<br>>>> +<br>>>>   // The trip count of the loop and the base pointer of the addrec SCEV is<br>

>>>   // guaranteed to be loop invariant, which means that it should dominate the<br>>>>   // header.  This allows us to insert code for it in the preheader.<br>>>> @@ -484,8 +457,7 @@<br>>>>   // Okay, we have a strided store "p[i]" of a splattable value.  We can turn<br>

>>>   // this into a memset in the loop preheader now if we want.  However, this<br>>>>   // would be unsafe to do if there is anything else in the loop that may read<br>>>> -  // or write to the aliased location.  Check for any overlap by generating the<br>

>>> -  // base pointer and checking the region.<br>>>> +  // or write to the aliased location.<br>>>>   assert(DestPtr->getType()->isPointerTy()<br>>>>       && "Must be a pointer type.");<br>

>>>   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();<br>>>> @@ -494,15 +466,6 @@<br>>>>                            Preheader->getTerminator());<br>>>><br>>>><br>

>>> -  if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef,<br>>>> -                            CurLoop, BECount,<br>>>> -                            StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){<br>

>>> -    Expander.clear();<br>>>> -    // If we generated new code for the base pointer, clean up.<br>>>> -    deleteIfDeadInstruction(BasePtr, *SE, TLI);<br>>>> -    return false;<br>>>> -  }<br>

>>> -<br>>>>   // Okay, everything looks good, insert the memset.<br>>>><br>>>>   // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to<br>>>> @@ -565,6 +528,33 @@<br>

>>><br>>>>   LoadInst *LI = cast<LoadInst>(SI->getValueOperand());<br>>>><br>>>> +  // Make sure the load and the store have no dependencies (i.e. other loads and<br>>>> +  // stores) in the loop. We ignore the direct dependency between SI and LI here<br>

>>> +  // and check it later.<br>>>> +  DependenceAnalysis &DA = getAnalysis<DependenceAnalysis>();<br>>>> +  for (Loop::block_iterator BI = CurLoop->block_begin(),<br>>>> +       BE = CurLoop->block_end(); BI != BE; ++BI)<br>

>>> +    for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)<br>>>> +      if (&*I != SI && &*I != LI && I->mayReadOrWriteMemory()) {<br>>>> +        // First, check if there is a dependence of the store.<br>

>>> +        OwningPtr<Dependence> DS(DA.depends(SI, I, true));<br>>>> +        if (DS)<br>>>> +          return false;<br>>>> +        // If the scanned instructon may modify memory then we also have to<br>

>>> +        // check for dependencys on the load.<br>>>> +        if (I->mayWriteToMemory()) {<br>>>> +          OwningPtr<Dependence> DL(DA.depends(I, LI, true));<br>>>> +          if (DL)<br>

>>> +            return false;<br>>>> +        }<br>>>> +      }<br>>>> +<br>>>> +  // Now check the dependency between SI and LI. If there is no dependency we<br>>>> +  // can safely emit a memcpy.<br>

>>> +  OwningPtr<Dependence> Dep(DA.depends(SI, LI, true));<br>>>> +  if (Dep)<br>>>> +    return false;<br>>>> +<br>>>>   // The trip count of the loop and the base pointer of the addrec SCEV is<br>

>>>   // guaranteed to be loop invariant, which means that it should dominate the<br>>>>   // header.  This allows us to insert code for it in the preheader.<br>>>> @@ -573,41 +563,16 @@<br>>>>   SCEVExpander Expander(*SE, "loop-idiom");<br>

>>><br>>>>   // Okay, we have a strided store "p[i]" of a loaded value.  We can turn<br>>>> -  // this into a memcpy in the loop preheader now if we want.  However, this<br>>>> -  // would be unsafe to do if there is anything else in the loop that may read<br>

>>> -  // or write the memory region we're storing to.  This includes the load that<br>>>> -  // feeds the stores.  Check for an alias by generating the base address and<br>>>> -  // checking everything.<br>

>>> +  // this into a memcpy in the loop preheader now if we want.<br>>>>   Value *StoreBasePtr =<br>>>>     Expander.expandCodeFor(StoreEv->getStart(),<br>>>>                            Builder.getInt8PtrTy(SI->getPointerAddressSpace()),<br>

>>>                            Preheader->getTerminator());<br>>>> -<br>>>> -  if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef,<br>>>> -                            CurLoop, BECount, StoreSize,<br>

>>> -                            getAnalysis<AliasAnalysis>(), SI)) {<br>>>> -    Expander.clear();<br>>>> -    // If we generated new code for the base pointer, clean up.<br>>>> -    deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);<br>

>>> -    return false;<br>>>> -  }<br>>>> -<br>>>> -  // For a memcpy, we have to make sure that the input array is not being<br>>>> -  // mutated by the loop.<br>>>>   Value *LoadBasePtr =<br>

>>>     Expander.expandCodeFor(LoadEv->getStart(),<br>>>>                            Builder.getInt8PtrTy(LI->getPointerAddressSpace()),<br>>>>                            Preheader->getTerminator());<br>

>>><br>>>> -  if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount,<br>>>> -                            StoreSize, getAnalysis<AliasAnalysis>(), SI)) {<br>>>> -    Expander.clear();<br>

>>> -    // If we generated new code for the base pointer, clean up.<br>>>> -    deleteIfDeadInstruction(LoadBasePtr, *SE, TLI);<br>>>> -    deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);<br>>>> -    return false;<br>

>>> -  }<br>>>> -<br>>>>   // Okay, everything is safe, we can transform this!<br>>>><br>>>><br>>>><br>>>> Added: llvm/trunk/test/Transforms/LoopIdiom/multi-dimensional.ll<br>

>>> URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/multi-dimensional.ll?rev=166874&view=auto">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/multi-dimensional.ll?rev=166874&view=auto</a><br>

>>> ==============================================================================<br>>>> --- llvm/trunk/test/Transforms/LoopIdiom/multi-dimensional.ll (added)<br>>>> +++ llvm/trunk/test/Transforms/LoopIdiom/multi-dimensional.ll Sat Oct 27 09:25:44 2012<br>

>>> @@ -0,0 +1,49 @@<br>>>> +; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s<br>>>> +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"<br>

>>> +target triple = "x86_64-apple-darwin10.0.0"<br>>>> +<br>>>> +%struct.ham = type { [2 x [2 x [2 x [16 x [8 x i32]]]]], i32, %struct.zot }<br>>>> +%struct.zot = type { i32, i16, i16, [2 x [1152 x i32]] }<br>

>>> +<br>>>> +define void @test1(%struct.ham* nocapture %arg) nounwind {<br>>>> +bb:<br>>>> +  br label %bb1<br>>>> +<br>>>> +bb1:                                              ; preds = %bb11, %bb<br>

>>> +  %tmp = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ]<br>>>> +  br label %bb2<br>>>> +<br>>>> +bb2:                                              ; preds = %bb2, %bb1<br>>>> +  %tmp3 = phi i64 [ 0, %bb1 ], [ %tmp8, %bb2 ]<br>

>>> +  %tmp4 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 1, i64 1, i64 %tmp, i64 %tmp3<br>>>> +  store i32 0, i32* %tmp4, align 4<br>>>> +  %tmp5 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 1, i64 0, i64 %tmp, i64 %tmp3<br>

>>> +  store i32 0, i32* %tmp5, align 4<br>>>> +  %tmp6 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 0, i64 1, i64 %tmp, i64 %tmp3<br>>>> +  store i32 0, i32* %tmp6, align 4<br>

>>> +  %tmp7 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 0, i64 0, i64 %tmp, i64 %tmp3<br>>>> +  store i32 0, i32* %tmp7, align 4<br>>>> +  %tmp8 = add i64 %tmp3, 1<br>>>> +  %tmp9 = trunc i64 %tmp8 to i32<br>

>>> +  %tmp10 = icmp eq i32 %tmp9, 8<br>>>> +  br i1 %tmp10, label %bb11, label %bb2<br>>>> +<br>>>> +bb11:                                             ; preds = %bb2<br>>>> +  %tmp12 = add i64 %tmp, 1<br>

>>> +  %tmp13 = trunc i64 %tmp12 to i32<br>>>> +  %tmp14 = icmp eq i32 %tmp13, 16<br>>>> +  br i1 %tmp14, label %bb15, label %bb1<br>>>> +<br>>>> +bb15:                                             ; preds = %bb11<br>

>>> +  ret void<br>>>> +<br>>>> +; CHECK: @test1<br>>>> +; CHECK: bb1:<br>>>> +; CHECK-NOT: store<br>>>> +; CHECK: call void @llvm.memset.p0i8.i64<br>>>> +; CHECK-NEXT: call void @llvm.memset.p0i8.i64<br>

>>> +; CHECK-NEXT: call void @llvm.memset.p0i8.i64<br>>>> +; CHECK-NEXT: call void @llvm.memset.p0i8.i64<br>>>> +; CHECK-NOT: store<br>>>> +; CHECK: br<br>>>> +}<br>>>><br>

>>> Added: llvm/trunk/test/Transforms/LoopIdiom/sideeffect.ll<br>>>> URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/sideeffect.ll?rev=166874&view=auto">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/sideeffect.ll?rev=166874&view=auto</a><br>

>>> ==============================================================================<br>>>> --- llvm/trunk/test/Transforms/LoopIdiom/sideeffect.ll (added)<br>>>> +++ llvm/trunk/test/Transforms/LoopIdiom/sideeffect.ll Sat Oct 27 09:25:44 2012<br>

>>> @@ -0,0 +1,53 @@<br>>>> +; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s<br>>>> +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"<br>

>>> +target triple = "x86_64-apple-darwin10.0.0"<br>>>> +<br>>>> +; PR9481<br>>>> +define i32 @test1() nounwind uwtable ssp {<br>>>> +entry:<br>>>> +  %a = alloca [10 x i8], align 1<br>

>>> +  br label %for.body<br>>>> +<br>>>> +for.cond1.preheader:                              ; preds = %for.body<br>>>> +  %arrayidx5.phi.trans.insert = getelementptr inbounds [10 x i8]* %a, i64 0, i64 0<br>

>>> +  %.pre = load i8* %arrayidx5.phi.trans.insert, align 1<br>>>> +  br label %for.body3<br>>>> +<br>>>> +for.body:                                         ; preds = %for.body, %entry<br>

>>> +  %indvars.iv29 = phi i64 [ 0, %entry ], [ %indvars.iv.next30, %for.body ]<br>>>> +  call void (...)* @bar() nounwind<br>>>> +  %arrayidx = getelementptr inbounds [10 x i8]* %a, i64 0, i64 %indvars.iv29<br>

>>> +  store i8 23, i8* %arrayidx, align 1<br>>>> +  %indvars.iv.next30 = add i64 %indvars.iv29, 1<br>>>> +  %lftr.wideiv31 = trunc i64 %indvars.iv.next30 to i32<br>>>> +  %exitcond32 = icmp eq i32 %lftr.wideiv31, 1000000<br>

>>> +  br i1 %exitcond32, label %for.cond1.preheader, label %for.body<br>>>> +<br>>>> +for.body3:                                        ; preds = %for.body3, %for.cond1.preheader<br>>>> +  %0 = phi i8 [ %.pre, %for.cond1.preheader ], [ %add, %for.body3 ]<br>

>>> +  %indvars.iv = phi i64 [ 1, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]<br>>>> +  call void (...)* @bar() nounwind<br>>>> +  %arrayidx7 = getelementptr inbounds [10 x i8]* %a, i64 0, i64 %indvars.iv<br>

>>> +  %1 = load i8* %arrayidx7, align 1<br>>>> +  %add = add i8 %1, %0<br>>>> +  store i8 %add, i8* %arrayidx7, align 1<br>>>> +  %indvars.iv.next = add i64 %indvars.iv, 1<br>>>> +  %lftr.wideiv = trunc i64 %indvars.iv.next to i32<br>

>>> +  %exitcond = icmp eq i32 %lftr.wideiv, 1000000<br>>>> +  br i1 %exitcond, label %for.end12, label %for.body3<br>>>> +<br>>>> +for.end12:                                        ; preds = %for.body3<br>

>>> +  %arrayidx13 = getelementptr inbounds [10 x i8]* %a, i64 0, i64 2<br>>>> +  %2 = load i8* %arrayidx13, align 1<br>>>> +  %conv14 = sext i8 %2 to i32<br>>>> +  %arrayidx15 = getelementptr inbounds [10 x i8]* %a, i64 0, i64 6<br>

>>> +  %3 = load i8* %arrayidx15, align 1<br>>>> +  %conv16 = sext i8 %3 to i32<br>>>> +  %add17 = add nsw i32 %conv16, %conv14<br>>>> +  ret i32 %add17<br>>>> +<br>>>> +; CHECK: @test1<br>

>>> +; CHECK-NOT: @llvm.memset<br>>>> +}<br>>>> +<br>>>> +declare void @bar(...)<br>>>><br>>>><br>>>><br>>>><br>>>> ------------------------------<br>

>>><br>>>> Message: 19<br>>>> Date: Sat, 27 Oct 2012 14:25:51 -0000<br>>>> From: Benjamin Kramer <<a href="mailto:benny.kra@googlemail.com">benny.kra@googlemail.com</a>><br>>>> To: <a href="mailto:llvm-commits@cs.uiuc.edu">llvm-commits@cs.uiuc.edu</a><br>

>>> Subject: [llvm-commits] [llvm] r166875 - in /llvm/trunk:<br>>>>        lib/Transforms/Scalar/LoopIdiomRecognize.cpp<br>>>>        test/Transforms/LoopIdiom/basic.ll<br>>>> Message-ID: <<a href="mailto:20121027142551.C47DF2A6C065@llvm.org">20121027142551.C47DF2A6C065@llvm.org</a>><br>

>>> Content-Type: text/plain; charset="utf-8"<br>>>><br>>>> Author: d0k<br>>>> Date: Sat Oct 27 09:25:51 2012<br>>>> New Revision: 166875<br>>>><br>>>> URL: <a href="http://llvm.org/viewvc/llvm-project?rev=166875&view=rev">http://llvm.org/viewvc/llvm-project?rev=166875&view=rev</a><br>

>>> Log:<br>>>> LoopIdiom: Recognize memmove loops.<br>>>><br>>>> This turns loops like<br>>>>  for (unsigned i = 0; i != n; ++i)<br>>>>    p[i] = p[i+1];<br>>>> into memmove, which has a highly optimized implementation in most libcs.<br>

>>><br>>>> This was really easy with the new DependenceAnalysis :)<br>>>><br>>>> Modified:<br>>>>    llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp<br>>>>    llvm/trunk/test/Transforms/LoopIdiom/basic.ll<br>

>>><br>>>> Modified: llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp<br>>>> URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp?rev=166875&r1=166874&r2=166875&view=diff">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp?rev=166875&r1=166874&r2=166875&view=diff</a><br>

>>> ==============================================================================<br>>>> --- llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp (original)<br>>>> +++ llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp Sat Oct 27 09:25:51 2012<br>

>>> @@ -16,7 +16,7 @@<br>>>> // TODO List:<br>>>> //<br>>>> // Future loop memory idioms to recognize:<br>>>> -//   memcmp, memmove, strlen, etc.<br>>>> +//   memcmp, strlen, etc.<br>

>>> // Future floating point idioms to recognize in -ffast-math mode:<br>>>> //   fpowi<br>>>> // Future integer operation idioms to recognize:<br>>>> @@ -60,8 +60,9 @@<br>>>> #include "llvm/Transforms/Utils/Local.h"<br>

>>> using namespace llvm;<br>>>><br>>>> -STATISTIC(NumMemSet, "Number of memset's formed from loop stores");<br>>>> -STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");<br>

>>> +STATISTIC(NumMemSet, "Number of memsets formed from loop stores");<br>>>> +STATISTIC(NumMemCpy, "Number of memcpys formed from loop load+stores");<br>>>> +STATISTIC(NumMemMove, "Number of memmoves formed from loop load+stores");<br>

>>><br>>>> namespace {<br>>>>   class LoopIdiomRecognize : public LoopPass {<br>>>> @@ -532,6 +533,7 @@<br>>>>   // stores) in the loop. We ignore the direct dependency between SI and LI here<br>

>>>   // and check it later.<br>>>>   DependenceAnalysis &DA = getAnalysis<DependenceAnalysis>();<br>>>> +  bool isMemcpySafe = true;<br>>>>   for (Loop::block_iterator BI = CurLoop->block_begin(),<br>

>>>        BE = CurLoop->block_end(); BI != BE; ++BI)<br>>>>     for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)<br>>>> @@ -552,8 +554,14 @@<br>>>>   // Now check the dependency between SI and LI. If there is no dependency we<br>

>>>   // can safely emit a memcpy.<br>>>>   OwningPtr<Dependence> Dep(DA.depends(SI, LI, true));<br>>>> -  if (Dep)<br>>>> -    return false;<br>>>> +  if (Dep) {<br>>>> +    // If there is a dependence but the direction is positive we can still<br>

>>> +    // safely turn this into memmove.<br>>>> +    if (Dep->getLevels() != 1 ||<br>>>> +        Dep->getDirection(1) != Dependence::DVEntry::GT)<br>>>> +      return false;<br>

>><br>>> Why the restriction to an un-nested loop? Couldn't this work for a<br>>> deeply-nested loop?<br>><br>> LI and SI are always in the same loop, so the level cannot be != 1 here, right?</div>

<div><br></div><div>The level of a dependence is the number of enclosing loops that</div></div><div>the two instructions have in common. If LI and SI are always in the</div><div>same loop, then the level is exactly the number of loops enclosing them.</div>

<div>For a more complex example, see DependenceAnalysis.h, near line 480.</div><div><br></div><div>Preston</div><div><br></div></div>