[llvm-commits] [llvm] r166874 - in /llvm/trunk: lib/Transforms/Scalar/LoopIdiomRecognize.cpp test/Transforms/LoopIdiom/multi-dimensional.ll test/Transforms/LoopIdiom/sideeffect.ll

Tue Oct 30 10:40:19 PDT 2012

On Tue, Oct 30, 2012 at 9:17 AM, Benjamin Kramer <benny.kra at gmail.com>
wrote:
>
> On 29.10.2012, at 19:10, Preston Briggs <preston.briggs at gmail.com> wrote:
>
>> I don't understand all this code, but I have a few comments inline.
>
> Thanks for the review :) I'm still working out how to make good use of
all the details you DA provides.
>
> One important thing about this pass (and all other LoopPasses) is that
nested
> loops are visited inside-out, starting with the most deeply nested loop.

Right, makes good sense.

>>> Author: d0k
>>> Date: Sat Oct 27 09:25:44 2012
>>> New Revision: 166874
>>>
>>> URL: http://llvm.org/viewvc/llvm-project?rev=166874&view=rev
>>> Log:
>>> LoopIdiom: Replace custom dependence analysis with DependenceAnalysis.
>>
>> :-)
>>
>>> Requires a lot less code and complexity on loop-idiom's side and the
more
>>> precise analysis can catch more cases, like the one I included as a
test case.
>>> This also fixes the edge-case miscompilation from PR9481.
>>>
>>> Compile time performance seems to be slightly worse, but this is mostly
due
>>> to an extra LCSSA run scheduled by the PassManager and should be fixed
there.
>>>
>>> Added:
>>>    llvm/trunk/test/Transforms/LoopIdiom/multi-dimensional.ll
>>>    llvm/trunk/test/Transforms/LoopIdiom/sideeffect.ll
>>> Modified:
>>>    llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
>>>
>>> Modified: llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
>>> URL:
http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp?rev=166874&r1=166873&r2=166874&view=diff
>>>
==============================================================================
>>> --- llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp (original)
>>> +++ llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp Sat Oct 27
09:25:44 2012
>>> @@ -48,6 +48,7 @@
>>> #include "llvm/Module.h"
>>> #include "llvm/ADT/Statistic.h"
>>> #include "llvm/Analysis/AliasAnalysis.h"
>>> +#include "llvm/Analysis/DependenceAnalysis.h"
>>> #include "llvm/Analysis/LoopPass.h"
>>> #include "llvm/Analysis/ScalarEvolutionExpander.h"
>>> #include "llvm/Analysis/ScalarEvolutionExpressions.h"
>>> @@ -106,6 +107,8 @@
>>>       AU.addPreserved<AliasAnalysis>();
>>>       AU.addRequired<ScalarEvolution>();
>>>       AU.addPreserved<ScalarEvolution>();
>>> +      AU.addRequired<DependenceAnalysis>();
>>> +      AU.addPreserved<DependenceAnalysis>();
>>>       AU.addPreserved<DominatorTree>();
>>>       AU.addRequired<DominatorTree>();
>>>       AU.addRequired<TargetLibraryInfo>();
>>> @@ -122,6 +125,7 @@
>>> INITIALIZE_PASS_DEPENDENCY(LCSSA)
>>> INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
>>> INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
>>> +INITIALIZE_PASS_DEPENDENCY(DependenceAnalysis)
>>> INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
>>> INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop
idioms",
>>>                     false, false)
>>> @@ -163,15 +167,6 @@
>>>   } while (!NowDeadInsts.empty());
>>> }
>>>
>>> -/// deleteIfDeadInstruction - If the specified value is a dead
instruction,
>>> -/// delete it and any recursively used instructions.
>>> -static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE,
>>> -                                    const TargetLibraryInfo *TLI) {
>>> -  if (Instruction *I = dyn_cast<Instruction>(V))
>>> -    if (isInstructionTriviallyDead(I, TLI))
>>> -      deleteDeadInstruction(I, SE, TLI);
>>> -}
>>> -
>>> bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
>>>   CurLoop = L;
>>>
>>> @@ -368,40 +363,6 @@
>>>                                  MSI, Ev, BECount);
>>> }
>>>
>>> -
>>> -/// mayLoopAccessLocation - Return true if the specified loop might
access the
>>> -/// specified pointer location, which is a loop-strided access.  The
'Access'
>>> -/// argument specifies what the verboten forms of access are (read or
write).
>>> -static bool mayLoopAccessLocation(Value
*Ptr,AliasAnalysis::ModRefResult Access,
>>> -                                  Loop *L, const SCEV *BECount,
>>> -                                  unsigned StoreSize, AliasAnalysis
&AA,
>>> -                                  Instruction *IgnoredStore) {
>>> -  // Get the location that may be stored across the loop.  Since the
access is
>>> -  // strided positively through memory, we say that the modified
location starts
>>> -  // at the pointer and has infinite size.
>>> -  uint64_t AccessSize = AliasAnalysis::UnknownSize;
>>> -
>>> -  // If the loop iterates a fixed number of times, we can refine the
access size
>>> -  // to be exactly the size of the memset, which is
(BECount+1)*StoreSize
>>> -  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
>>> -    AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;
>>> -
>>> -  // TODO: For this to be really effective, we have to dive into the
pointer
>>> -  // operand in the store.  Store to &A[i] of 100 will always return
may alias
>>> -  // with store of &A[100], we need to StoreLoc to be "A" with size of
100,
>>> -  // which will then no-alias a store to &A[100].
>>> -  AliasAnalysis::Location StoreLoc(Ptr, AccessSize);
>>> -
>>> -  for (Loop::block_iterator BI = L->block_begin(), E = L->block_end();
BI != E;
>>> -       ++BI)
>>> -    for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I
!= E; ++I)
>>> -      if (&*I != IgnoredStore &&
>>> -          (AA.getModRefInfo(I, StoreLoc) & Access))
>>> -        return true;
>>> -
>>> -  return false;
>>> -}
>>> -
>>> /// getMemSetPatternValue - If a strided store of the specified value
is safe to
>>> /// turn into a memset_pattern16, return a ConstantArray of 16 bytes
that should
>>> /// be passed in.  Otherwise, return null.
>>> @@ -474,6 +435,18 @@
>>>     return false;
>>>   }
>>>
>>> +  // Make sure the store has no dependencies (i.e. other loads and
stores) in
>>> +  // the loop.
>>> +  DependenceAnalysis &DA = getAnalysis<DependenceAnalysis>();
>>> +  for (Loop::block_iterator BI = CurLoop->block_begin(),
>>> +       BE = CurLoop->block_end(); BI != BE; ++BI)
>>> +    for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I
!= E; ++I)
>>> +      if (&*I != TheStore && I->mayReadOrWriteMemory()) {
>>> +        OwningPtr<Dependence> D(DA.depends(TheStore, I, true));
>>> +        if (D)
>>> +          return false;
>>
>> This seems pessimistic. If the candidate loop is nested, we could
>> still find some idioms in the presence of dependences. We should check
>> to see if there's a loop-independent dependence or if there's a
>> dependence at the innermost level.
>
> The code was adapted from the previous custom analysis so there is
> a high possibility that it can be made a lot more aggressive. However,
> if we can turn the inner loop into a function call, the pass would've done
> that when visiting the inner loop. Am I missing something?

Loops are numbered from 1 (outermost) to n (innermost).
Instructions not contained in a loop are said to be at level 0.
So if we have code like this

*while (...) {
   ...
   for (i = 0; i < n; i++)
      A[i] = A[i + n];
   ...
}*

there will be dependences between the various references to A
(in your code above, D won't be NULL). D->getDirection(1) will
be ALL (and D->isScalar(1) will be true), but D->getDirection(2)
should be NONE, so you ought to be able to replace the inner loop
with a call to memmove.

>>> +      }
>>> +
>>>   // The trip count of the loop and the base pointer of the addrec SCEV
is
>>>   // guaranteed to be loop invariant, which means that it should
dominate the
>>>   // header.  This allows us to insert code for it in the preheader.
>>> @@ -484,8 +457,7 @@
>>>   // Okay, we have a strided store "p[i]" of a splattable value.  We
can turn
>>>   // this into a memset in the loop preheader now if we want.  However,
this
>>>   // would be unsafe to do if there is anything else in the loop that
may read
>>> -  // or write to the aliased location.  Check for any overlap by
generating the
>>> -  // base pointer and checking the region.
>>> +  // or write to the aliased location.
>>>   assert(DestPtr->getType()->isPointerTy()
>>>       && "Must be a pointer type.");
>>>   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
>>> @@ -494,15 +466,6 @@
>>>                            Preheader->getTerminator());
>>>
>>>
>>> -  if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef,
>>> -                            CurLoop, BECount,
>>> -                            StoreSize, getAnalysis<AliasAnalysis>(),
TheStore)){
>>> -    Expander.clear();
>>> -    // If we generated new code for the base pointer, clean up.
>>> -    deleteIfDeadInstruction(BasePtr, *SE, TLI);
>>> -    return false;
>>> -  }
>>> -
>>>   // Okay, everything looks good, insert the memset.
>>>
>>>   // The # stored bytes is (BECount+1)*Size.  Expand the trip count out
to
>>> @@ -565,6 +528,33 @@
>>>
>>>   LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
>>>
>>> +  // Make sure the load and the store have no dependencies (i.e. other
loads and
>>> +  // stores) in the loop. We ignore the direct dependency between SI
and LI here
>>> +  // and check it later.
>>> +  DependenceAnalysis &DA = getAnalysis<DependenceAnalysis>();
>>> +  for (Loop::block_iterator BI = CurLoop->block_begin(),
>>> +       BE = CurLoop->block_end(); BI != BE; ++BI)
>>> +    for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I
!= E; ++I)
>>> +      if (&*I != SI && &*I != LI && I->mayReadOrWriteMemory()) {
>>> +        // First, check if there is a dependence of the store.
>>> +        OwningPtr<Dependence> DS(DA.depends(SI, I, true));
>>> +        if (DS)
>>> +          return false;
>>> +        // If the scanned instructon may modify memory then we also
have to
>>> +        // check for dependencys on the load.
>>> +        if (I->mayWriteToMemory()) {
>>> +          OwningPtr<Dependence> DL(DA.depends(I, LI, true));
>>> +          if (DL)
>>> +            return false;
>>> +        }
>>> +      }
>>> +
>>> +  // Now check the dependency between SI and LI. If there is no
dependency we
>>> +  // can safely emit a memcpy.
>>> +  OwningPtr<Dependence> Dep(DA.depends(SI, LI, true));
>>> +  if (Dep)
>>> +    return false;
>>> +
>>>   // The trip count of the loop and the base pointer of the addrec SCEV
is
>>>   // guaranteed to be loop invariant, which means that it should
dominate the
>>>   // header.  This allows us to insert code for it in the preheader.
>>> @@ -573,41 +563,16 @@
>>>   SCEVExpander Expander(*SE, "loop-idiom");
>>>
>>>   // Okay, we have a strided store "p[i]" of a loaded value.  We can
turn
>>> -  // this into a memcpy in the loop preheader now if we want.
 However, this
>>> -  // would be unsafe to do if there is anything else in the loop that
may read
>>> -  // or write the memory region we're storing to.  This includes the
load that
>>> -  // feeds the stores.  Check for an alias by generating the base
address and
>>> -  // checking everything.
>>> +  // this into a memcpy in the loop preheader now if we want.
>>>   Value *StoreBasePtr =
>>>     Expander.expandCodeFor(StoreEv->getStart(),
>>>
 Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
>>>                            Preheader->getTerminator());
>>> -
>>> -  if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef,
>>> -                            CurLoop, BECount, StoreSize,
>>> -                            getAnalysis<AliasAnalysis>(), SI)) {
>>> -    Expander.clear();
>>> -    // If we generated new code for the base pointer, clean up.
>>> -    deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
>>> -    return false;
>>> -  }
>>> -
>>> -  // For a memcpy, we have to make sure that the input array is not
being
>>> -  // mutated by the loop.
>>>   Value *LoadBasePtr =
>>>     Expander.expandCodeFor(LoadEv->getStart(),
>>>
 Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
>>>                            Preheader->getTerminator());
>>>
>>> -  if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop,
BECount,
>>> -                            StoreSize, getAnalysis<AliasAnalysis>(),
SI)) {
>>> -    Expander.clear();
>>> -    // If we generated new code for the base pointer, clean up.
>>> -    deleteIfDeadInstruction(LoadBasePtr, *SE, TLI);
>>> -    deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
>>> -    return false;
>>> -  }
>>> -
>>>   // Okay, everything is safe, we can transform this!
>>>
>>>
>>>
>>> Added: llvm/trunk/test/Transforms/LoopIdiom/multi-dimensional.ll
>>> URL:
http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/multi-dimensional.ll?rev=166874&view=auto
>>>
==============================================================================
>>> --- llvm/trunk/test/Transforms/LoopIdiom/multi-dimensional.ll (added)
>>> +++ llvm/trunk/test/Transforms/LoopIdiom/multi-dimensional.ll Sat Oct
27 09:25:44 2012
>>> @@ -0,0 +1,49 @@
>>> +; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
>>> +target datalayout =
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
>>> +target triple = "x86_64-apple-darwin10.0.0"
>>> +
>>> +%struct.ham = type { [2 x [2 x [2 x [16 x [8 x i32]]]]], i32,
%struct.zot }
>>> +%struct.zot = type { i32, i16, i16, [2 x [1152 x i32]] }
>>> +
>>> +define void @test1(%struct.ham* nocapture %arg) nounwind {
>>> +bb:
>>> +  br label %bb1
>>> +
>>> +bb1:                                              ; preds = %bb11, %bb
>>> +  %tmp = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ]
>>> +  br label %bb2
>>> +
>>> +bb2:                                              ; preds = %bb2, %bb1
>>> +  %tmp3 = phi i64 [ 0, %bb1 ], [ %tmp8, %bb2 ]
>>> +  %tmp4 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64
0, i64 1, i64 1, i64 %tmp, i64 %tmp3
>>> +  store i32 0, i32* %tmp4, align 4
>>> +  %tmp5 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64
0, i64 1, i64 0, i64 %tmp, i64 %tmp3
>>> +  store i32 0, i32* %tmp5, align 4
>>> +  %tmp6 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64
0, i64 0, i64 1, i64 %tmp, i64 %tmp3
>>> +  store i32 0, i32* %tmp6, align 4
>>> +  %tmp7 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64
0, i64 0, i64 0, i64 %tmp, i64 %tmp3
>>> +  store i32 0, i32* %tmp7, align 4
>>> +  %tmp8 = add i64 %tmp3, 1
>>> +  %tmp9 = trunc i64 %tmp8 to i32
>>> +  %tmp10 = icmp eq i32 %tmp9, 8
>>> +  br i1 %tmp10, label %bb11, label %bb2
>>> +
>>> +bb11:                                             ; preds = %bb2
>>> +  %tmp12 = add i64 %tmp, 1
>>> +  %tmp13 = trunc i64 %tmp12 to i32
>>> +  %tmp14 = icmp eq i32 %tmp13, 16
>>> +  br i1 %tmp14, label %bb15, label %bb1
>>> +
>>> +bb15:                                             ; preds = %bb11
>>> +  ret void
>>> +
>>> +; CHECK: @test1
>>> +; CHECK: bb1:
>>> +; CHECK-NOT: store
>>> +; CHECK: call void @llvm.memset.p0i8.i64
>>> +; CHECK-NEXT: call void @llvm.memset.p0i8.i64
>>> +; CHECK-NEXT: call void @llvm.memset.p0i8.i64
>>> +; CHECK-NEXT: call void @llvm.memset.p0i8.i64
>>> +; CHECK-NOT: store
>>> +; CHECK: br
>>> +}
>>>
>>> Added: llvm/trunk/test/Transforms/LoopIdiom/sideeffect.ll
>>> URL:
http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/sideeffect.ll?rev=166874&view=auto
>>>
==============================================================================
>>> --- llvm/trunk/test/Transforms/LoopIdiom/sideeffect.ll (added)
>>> +++ llvm/trunk/test/Transforms/LoopIdiom/sideeffect.ll Sat Oct 27
09:25:44 2012
>>> @@ -0,0 +1,53 @@
>>> +; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
>>> +target datalayout =
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
>>> +target triple = "x86_64-apple-darwin10.0.0"
>>> +
>>> +; PR9481
>>> +define i32 @test1() nounwind uwtable ssp {
>>> +entry:
>>> +  %a = alloca [10 x i8], align 1
>>> +  br label %for.body
>>> +
>>> +for.cond1.preheader:                              ; preds = %for.body
>>> +  %arrayidx5.phi.trans.insert = getelementptr inbounds [10 x i8]* %a,
i64 0, i64 0
>>> +  %.pre = load i8* %arrayidx5.phi.trans.insert, align 1
>>> +  br label %for.body3
>>> +
>>> +for.body:                                         ; preds = %for.body,
%entry
>>> +  %indvars.iv29 = phi i64 [ 0, %entry ], [ %indvars.iv.next30,
%for.body ]
>>> +  call void (...)* @bar() nounwind
>>> +  %arrayidx = getelementptr inbounds [10 x i8]* %a, i64 0, i64
%indvars.iv29
>>> +  store i8 23, i8* %arrayidx, align 1
>>> +  %indvars.iv.next30 = add i64 %indvars.iv29, 1
>>> +  %lftr.wideiv31 = trunc i64 %indvars.iv.next30 to i32
>>> +  %exitcond32 = icmp eq i32 %lftr.wideiv31, 1000000
>>> +  br i1 %exitcond32, label %for.cond1.preheader, label %for.body
>>> +
>>> +for.body3:                                        ; preds =
%for.body3, %for.cond1.preheader
>>> +  %0 = phi i8 [ %.pre, %for.cond1.preheader ], [ %add, %for.body3 ]
>>> +  %indvars.iv = phi i64 [ 1, %for.cond1.preheader ], [
%indvars.iv.next, %for.body3 ]
>>> +  call void (...)* @bar() nounwind
>>> +  %arrayidx7 = getelementptr inbounds [10 x i8]* %a, i64 0, i64
%indvars.iv
>>> +  %1 = load i8* %arrayidx7, align 1
>>> +  %add = add i8 %1, %0
>>> +  store i8 %add, i8* %arrayidx7, align 1
>>> +  %indvars.iv.next = add i64 %indvars.iv, 1
>>> +  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
>>> +  %exitcond = icmp eq i32 %lftr.wideiv, 1000000
>>> +  br i1 %exitcond, label %for.end12, label %for.body3
>>> +
>>> +for.end12:                                        ; preds = %for.body3
>>> +  %arrayidx13 = getelementptr inbounds [10 x i8]* %a, i64 0, i64 2
>>> +  %2 = load i8* %arrayidx13, align 1
>>> +  %conv14 = sext i8 %2 to i32
>>> +  %arrayidx15 = getelementptr inbounds [10 x i8]* %a, i64 0, i64 6
>>> +  %3 = load i8* %arrayidx15, align 1
>>> +  %conv16 = sext i8 %3 to i32
>>> +  %add17 = add nsw i32 %conv16, %conv14
>>> +  ret i32 %add17
>>> +
>>> +; CHECK: @test1
>>> +; CHECK-NOT: @llvm.memset
>>> +}
>>> +
>>> +declare void @bar(...)
>>>
>>>
>>>
>>>
>>> ------------------------------
>>>
>>> Message: 19
>>> Date: Sat, 27 Oct 2012 14:25:51 -0000
>>> From: Benjamin Kramer <benny.kra at googlemail.com>
>>> To: llvm-commits at cs.uiuc.edu
>>> Subject: [llvm-commits] [llvm] r166875 - in /llvm/trunk:
>>>        lib/Transforms/Scalar/LoopIdiomRecognize.cpp
>>>        test/Transforms/LoopIdiom/basic.ll
>>> Message-ID: <20121027142551.C47DF2A6C065 at llvm.org>
>>> Content-Type: text/plain; charset="utf-8"
>>>
>>> Author: d0k
>>> Date: Sat Oct 27 09:25:51 2012
>>> New Revision: 166875
>>>
>>> URL: http://llvm.org/viewvc/llvm-project?rev=166875&view=rev
>>> Log:
>>> LoopIdiom: Recognize memmove loops.
>>>
>>> This turns loops like
>>>  for (unsigned i = 0; i != n; ++i)
>>>    p[i] = p[i+1];
>>> into memmove, which has a highly optimized implementation in most libcs.
>>>
>>> This was really easy with the new DependenceAnalysis :)
>>>
>>> Modified:
>>>    llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
>>>    llvm/trunk/test/Transforms/LoopIdiom/basic.ll
>>>
>>> Modified: llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
>>> URL:
http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp?rev=166875&r1=166874&r2=166875&view=diff
>>>
==============================================================================
>>> --- llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp (original)
>>> +++ llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp Sat Oct 27
09:25:51 2012
>>> @@ -16,7 +16,7 @@
>>> // TODO List:
>>> //
>>> // Future loop memory idioms to recognize:
>>> -//   memcmp, memmove, strlen, etc.
>>> +//   memcmp, strlen, etc.
>>> // Future floating point idioms to recognize in -ffast-math mode:
>>> //   fpowi
>>> // Future integer operation idioms to recognize:
>>> @@ -60,8 +60,9 @@
>>> #include "llvm/Transforms/Utils/Local.h"
>>> using namespace llvm;
>>>
>>> -STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
>>> -STATISTIC(NumMemCpy, "Number of memcpy's formed from loop
load+stores");
>>> +STATISTIC(NumMemSet, "Number of memsets formed from loop stores");
>>> +STATISTIC(NumMemCpy, "Number of memcpys formed from loop load+stores");
>>> +STATISTIC(NumMemMove, "Number of memmoves formed from loop
load+stores");
>>>
>>> namespace {
>>>   class LoopIdiomRecognize : public LoopPass {
>>> @@ -532,6 +533,7 @@
>>>   // stores) in the loop. We ignore the direct dependency between SI
and LI here
>>>   // and check it later.
>>>   DependenceAnalysis &DA = getAnalysis<DependenceAnalysis>();
>>> +  bool isMemcpySafe = true;
>>>   for (Loop::block_iterator BI = CurLoop->block_begin(),
>>>        BE = CurLoop->block_end(); BI != BE; ++BI)
>>>     for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I
!= E; ++I)
>>> @@ -552,8 +554,14 @@
>>>   // Now check the dependency between SI and LI. If there is no
dependency we
>>>   // can safely emit a memcpy.
>>>   OwningPtr<Dependence> Dep(DA.depends(SI, LI, true));
>>> -  if (Dep)
>>> -    return false;
>>> +  if (Dep) {
>>> +    // If there is a dependence but the direction is positive we can
still
>>> +    // safely turn this into memmove.
>>> +    if (Dep->getLevels() != 1 ||
>>> +        Dep->getDirection(1) != Dependence::DVEntry::GT)
>>> +      return false;
>>
>> Why the restriction to an un-nested loop? Couldn't this work for a
>> deeply-nested loop?
>
> LI and SI are always in the same loop, so the level cannot be != 1 here,
right?

The level of a dependence is the number of enclosing loops that
the two instructions have in common. If LI and SI are always in the
same loop, then the level is exactly the number of loops enclosing them.
For a more complex example, see DependenceAnalysis.h, near line 480.

Preston
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20121030/491d2443/attachment.html>