[llvm-commits] [llvm] r166874 - in /llvm/trunk: lib/Transforms/Scalar/LoopIdiomRecognize.cpp test/Transforms/LoopIdiom/multi-dimensional.ll test/Transforms/LoopIdiom/sideeffect.ll

Preston Briggs preston.briggs at gmail.com
Mon Oct 29 11:10:59 PDT 2012


I don't understand all this code, but I have a few comments inline.

> Author: d0k
> Date: Sat Oct 27 09:25:44 2012
> New Revision: 166874
>
> URL: http://llvm.org/viewvc/llvm-project?rev=166874&view=rev
> Log:
> LoopIdiom: Replace custom dependence analysis with DependenceAnalysis.

:-)

> Requires a lot less code and complexity on loop-idiom's side and the more
> precise analysis can catch more cases, like the one I included as a test case.
> This also fixes the edge-case miscompilation from PR9481.
>
> Compile time performance seems to be slightly worse, but this is mostly due
> to an extra LCSSA run scheduled by the PassManager and should be fixed there.
>
> Added:
>     llvm/trunk/test/Transforms/LoopIdiom/multi-dimensional.ll
>     llvm/trunk/test/Transforms/LoopIdiom/sideeffect.ll
> Modified:
>     llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
>
> Modified: llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp?rev=166874&r1=166873&r2=166874&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp (original)
> +++ llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp Sat Oct 27 09:25:44 2012
> @@ -48,6 +48,7 @@
>  #include "llvm/Module.h"
>  #include "llvm/ADT/Statistic.h"
>  #include "llvm/Analysis/AliasAnalysis.h"
> +#include "llvm/Analysis/DependenceAnalysis.h"
>  #include "llvm/Analysis/LoopPass.h"
>  #include "llvm/Analysis/ScalarEvolutionExpander.h"
>  #include "llvm/Analysis/ScalarEvolutionExpressions.h"
> @@ -106,6 +107,8 @@
>        AU.addPreserved<AliasAnalysis>();
>        AU.addRequired<ScalarEvolution>();
>        AU.addPreserved<ScalarEvolution>();
> +      AU.addRequired<DependenceAnalysis>();
> +      AU.addPreserved<DependenceAnalysis>();
>        AU.addPreserved<DominatorTree>();
>        AU.addRequired<DominatorTree>();
>        AU.addRequired<TargetLibraryInfo>();
> @@ -122,6 +125,7 @@
>  INITIALIZE_PASS_DEPENDENCY(LCSSA)
>  INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
>  INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
> +INITIALIZE_PASS_DEPENDENCY(DependenceAnalysis)
>  INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
>  INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
>                      false, false)
> @@ -163,15 +167,6 @@
>    } while (!NowDeadInsts.empty());
>  }
>
> -/// deleteIfDeadInstruction - If the specified value is a dead instruction,
> -/// delete it and any recursively used instructions.
> -static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE,
> -                                    const TargetLibraryInfo *TLI) {
> -  if (Instruction *I = dyn_cast<Instruction>(V))
> -    if (isInstructionTriviallyDead(I, TLI))
> -      deleteDeadInstruction(I, SE, TLI);
> -}
> -
>  bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
>    CurLoop = L;
>
> @@ -368,40 +363,6 @@
>                                   MSI, Ev, BECount);
>  }
>
> -
> -/// mayLoopAccessLocation - Return true if the specified loop might access the
> -/// specified pointer location, which is a loop-strided access.  The 'Access'
> -/// argument specifies what the verboten forms of access are (read or write).
> -static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
> -                                  Loop *L, const SCEV *BECount,
> -                                  unsigned StoreSize, AliasAnalysis &AA,
> -                                  Instruction *IgnoredStore) {
> -  // Get the location that may be stored across the loop.  Since the access is
> -  // strided positively through memory, we say that the modified location starts
> -  // at the pointer and has infinite size.
> -  uint64_t AccessSize = AliasAnalysis::UnknownSize;
> -
> -  // If the loop iterates a fixed number of times, we can refine the access size
> -  // to be exactly the size of the memset, which is (BECount+1)*StoreSize
> -  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
> -    AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;
> -
> -  // TODO: For this to be really effective, we have to dive into the pointer
> -  // operand in the store.  Store to &A[i] of 100 will always return may alias
> -  // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
> -  // which will then no-alias a store to &A[100].
> -  AliasAnalysis::Location StoreLoc(Ptr, AccessSize);
> -
> -  for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
> -       ++BI)
> -    for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
> -      if (&*I != IgnoredStore &&
> -          (AA.getModRefInfo(I, StoreLoc) & Access))
> -        return true;
> -
> -  return false;
> -}
> -
>  /// getMemSetPatternValue - If a strided store of the specified value is safe to
>  /// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
>  /// be passed in.  Otherwise, return null.
> @@ -474,6 +435,18 @@
>      return false;
>    }
>
> +  // Make sure the store has no dependencies (i.e. other loads and stores) in
> +  // the loop.
> +  DependenceAnalysis &DA = getAnalysis<DependenceAnalysis>();
> +  for (Loop::block_iterator BI = CurLoop->block_begin(),
> +       BE = CurLoop->block_end(); BI != BE; ++BI)
> +    for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
> +      if (&*I != TheStore && I->mayReadOrWriteMemory()) {
> +        OwningPtr<Dependence> D(DA.depends(TheStore, I, true));
> +        if (D)
> +          return false;

This seems pessimistic. If the candidate loop is nested, we could
still find some idioms in the presence of dependences. We should check
to see if there's a loop-independent dependence or if there's a
dependence at the innermost level.


> +      }
> +
>    // The trip count of the loop and the base pointer of the addrec SCEV is
>    // guaranteed to be loop invariant, which means that it should dominate the
>    // header.  This allows us to insert code for it in the preheader.
> @@ -484,8 +457,7 @@
>    // Okay, we have a strided store "p[i]" of a splattable value.  We can turn
>    // this into a memset in the loop preheader now if we want.  However, this
>    // would be unsafe to do if there is anything else in the loop that may read
> -  // or write to the aliased location.  Check for any overlap by generating the
> -  // base pointer and checking the region.
> +  // or write to the aliased location.
>    assert(DestPtr->getType()->isPointerTy()
>        && "Must be a pointer type.");
>    unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
> @@ -494,15 +466,6 @@
>                             Preheader->getTerminator());
>
>
> -  if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef,
> -                            CurLoop, BECount,
> -                            StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){
> -    Expander.clear();
> -    // If we generated new code for the base pointer, clean up.
> -    deleteIfDeadInstruction(BasePtr, *SE, TLI);
> -    return false;
> -  }
> -
>    // Okay, everything looks good, insert the memset.
>
>    // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
> @@ -565,6 +528,33 @@
>
>    LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
>
> +  // Make sure the load and the store have no dependencies (i.e. other loads and
> +  // stores) in the loop. We ignore the direct dependency between SI and LI here
> +  // and check it later.
> +  DependenceAnalysis &DA = getAnalysis<DependenceAnalysis>();
> +  for (Loop::block_iterator BI = CurLoop->block_begin(),
> +       BE = CurLoop->block_end(); BI != BE; ++BI)
> +    for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
> +      if (&*I != SI && &*I != LI && I->mayReadOrWriteMemory()) {
> +        // First, check if there is a dependence of the store.
> +        OwningPtr<Dependence> DS(DA.depends(SI, I, true));
> +        if (DS)
> +          return false;
> +        // If the scanned instructon may modify memory then we also have to
> +        // check for dependencys on the load.
> +        if (I->mayWriteToMemory()) {
> +          OwningPtr<Dependence> DL(DA.depends(I, LI, true));
> +          if (DL)
> +            return false;
> +        }
> +      }
> +
> +  // Now check the dependency between SI and LI. If there is no dependency we
> +  // can safely emit a memcpy.
> +  OwningPtr<Dependence> Dep(DA.depends(SI, LI, true));
> +  if (Dep)
> +    return false;
> +
>    // The trip count of the loop and the base pointer of the addrec SCEV is
>    // guaranteed to be loop invariant, which means that it should dominate the
>    // header.  This allows us to insert code for it in the preheader.
> @@ -573,41 +563,16 @@
>    SCEVExpander Expander(*SE, "loop-idiom");
>
>    // Okay, we have a strided store "p[i]" of a loaded value.  We can turn
> -  // this into a memcpy in the loop preheader now if we want.  However, this
> -  // would be unsafe to do if there is anything else in the loop that may read
> -  // or write the memory region we're storing to.  This includes the load that
> -  // feeds the stores.  Check for an alias by generating the base address and
> -  // checking everything.
> +  // this into a memcpy in the loop preheader now if we want.
>    Value *StoreBasePtr =
>      Expander.expandCodeFor(StoreEv->getStart(),
>                             Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
>                             Preheader->getTerminator());
> -
> -  if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef,
> -                            CurLoop, BECount, StoreSize,
> -                            getAnalysis<AliasAnalysis>(), SI)) {
> -    Expander.clear();
> -    // If we generated new code for the base pointer, clean up.
> -    deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
> -    return false;
> -  }
> -
> -  // For a memcpy, we have to make sure that the input array is not being
> -  // mutated by the loop.
>    Value *LoadBasePtr =
>      Expander.expandCodeFor(LoadEv->getStart(),
>                             Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
>                             Preheader->getTerminator());
>
> -  if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount,
> -                            StoreSize, getAnalysis<AliasAnalysis>(), SI)) {
> -    Expander.clear();
> -    // If we generated new code for the base pointer, clean up.
> -    deleteIfDeadInstruction(LoadBasePtr, *SE, TLI);
> -    deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
> -    return false;
> -  }
> -
>    // Okay, everything is safe, we can transform this!
>
>
>
> Added: llvm/trunk/test/Transforms/LoopIdiom/multi-dimensional.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/multi-dimensional.ll?rev=166874&view=auto
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopIdiom/multi-dimensional.ll (added)
> +++ llvm/trunk/test/Transforms/LoopIdiom/multi-dimensional.ll Sat Oct 27 09:25:44 2012
> @@ -0,0 +1,49 @@
> +; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
> +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
> +target triple = "x86_64-apple-darwin10.0.0"
> +
> +%struct.ham = type { [2 x [2 x [2 x [16 x [8 x i32]]]]], i32, %struct.zot }
> +%struct.zot = type { i32, i16, i16, [2 x [1152 x i32]] }
> +
> +define void @test1(%struct.ham* nocapture %arg) nounwind {
> +bb:
> +  br label %bb1
> +
> +bb1:                                              ; preds = %bb11, %bb
> +  %tmp = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ]
> +  br label %bb2
> +
> +bb2:                                              ; preds = %bb2, %bb1
> +  %tmp3 = phi i64 [ 0, %bb1 ], [ %tmp8, %bb2 ]
> +  %tmp4 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 1, i64 1, i64 %tmp, i64 %tmp3
> +  store i32 0, i32* %tmp4, align 4
> +  %tmp5 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 1, i64 0, i64 %tmp, i64 %tmp3
> +  store i32 0, i32* %tmp5, align 4
> +  %tmp6 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 0, i64 1, i64 %tmp, i64 %tmp3
> +  store i32 0, i32* %tmp6, align 4
> +  %tmp7 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 0, i64 0, i64 %tmp, i64 %tmp3
> +  store i32 0, i32* %tmp7, align 4
> +  %tmp8 = add i64 %tmp3, 1
> +  %tmp9 = trunc i64 %tmp8 to i32
> +  %tmp10 = icmp eq i32 %tmp9, 8
> +  br i1 %tmp10, label %bb11, label %bb2
> +
> +bb11:                                             ; preds = %bb2
> +  %tmp12 = add i64 %tmp, 1
> +  %tmp13 = trunc i64 %tmp12 to i32
> +  %tmp14 = icmp eq i32 %tmp13, 16
> +  br i1 %tmp14, label %bb15, label %bb1
> +
> +bb15:                                             ; preds = %bb11
> +  ret void
> +
> +; CHECK: @test1
> +; CHECK: bb1:
> +; CHECK-NOT: store
> +; CHECK: call void @llvm.memset.p0i8.i64
> +; CHECK-NEXT: call void @llvm.memset.p0i8.i64
> +; CHECK-NEXT: call void @llvm.memset.p0i8.i64
> +; CHECK-NEXT: call void @llvm.memset.p0i8.i64
> +; CHECK-NOT: store
> +; CHECK: br
> +}
>
> Added: llvm/trunk/test/Transforms/LoopIdiom/sideeffect.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/sideeffect.ll?rev=166874&view=auto
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopIdiom/sideeffect.ll (added)
> +++ llvm/trunk/test/Transforms/LoopIdiom/sideeffect.ll Sat Oct 27 09:25:44 2012
> @@ -0,0 +1,53 @@
> +; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
> +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
> +target triple = "x86_64-apple-darwin10.0.0"
> +
> +; PR9481
> +define i32 @test1() nounwind uwtable ssp {
> +entry:
> +  %a = alloca [10 x i8], align 1
> +  br label %for.body
> +
> +for.cond1.preheader:                              ; preds = %for.body
> +  %arrayidx5.phi.trans.insert = getelementptr inbounds [10 x i8]* %a, i64 0, i64 0
> +  %.pre = load i8* %arrayidx5.phi.trans.insert, align 1
> +  br label %for.body3
> +
> +for.body:                                         ; preds = %for.body, %entry
> +  %indvars.iv29 = phi i64 [ 0, %entry ], [ %indvars.iv.next30, %for.body ]
> +  call void (...)* @bar() nounwind
> +  %arrayidx = getelementptr inbounds [10 x i8]* %a, i64 0, i64 %indvars.iv29
> +  store i8 23, i8* %arrayidx, align 1
> +  %indvars.iv.next30 = add i64 %indvars.iv29, 1
> +  %lftr.wideiv31 = trunc i64 %indvars.iv.next30 to i32
> +  %exitcond32 = icmp eq i32 %lftr.wideiv31, 1000000
> +  br i1 %exitcond32, label %for.cond1.preheader, label %for.body
> +
> +for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
> +  %0 = phi i8 [ %.pre, %for.cond1.preheader ], [ %add, %for.body3 ]
> +  %indvars.iv = phi i64 [ 1, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
> +  call void (...)* @bar() nounwind
> +  %arrayidx7 = getelementptr inbounds [10 x i8]* %a, i64 0, i64 %indvars.iv
> +  %1 = load i8* %arrayidx7, align 1
> +  %add = add i8 %1, %0
> +  store i8 %add, i8* %arrayidx7, align 1
> +  %indvars.iv.next = add i64 %indvars.iv, 1
> +  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
> +  %exitcond = icmp eq i32 %lftr.wideiv, 1000000
> +  br i1 %exitcond, label %for.end12, label %for.body3
> +
> +for.end12:                                        ; preds = %for.body3
> +  %arrayidx13 = getelementptr inbounds [10 x i8]* %a, i64 0, i64 2
> +  %2 = load i8* %arrayidx13, align 1
> +  %conv14 = sext i8 %2 to i32
> +  %arrayidx15 = getelementptr inbounds [10 x i8]* %a, i64 0, i64 6
> +  %3 = load i8* %arrayidx15, align 1
> +  %conv16 = sext i8 %3 to i32
> +  %add17 = add nsw i32 %conv16, %conv14
> +  ret i32 %add17
> +
> +; CHECK: @test1
> +; CHECK-NOT: @llvm.memset
> +}
> +
> +declare void @bar(...)
>
>
>
>
> ------------------------------
>
> Message: 19
> Date: Sat, 27 Oct 2012 14:25:51 -0000
> From: Benjamin Kramer <benny.kra at googlemail.com>
> To: llvm-commits at cs.uiuc.edu
> Subject: [llvm-commits] [llvm] r166875 - in /llvm/trunk:
>         lib/Transforms/Scalar/LoopIdiomRecognize.cpp
>         test/Transforms/LoopIdiom/basic.ll
> Message-ID: <20121027142551.C47DF2A6C065 at llvm.org>
> Content-Type: text/plain; charset="utf-8"
>
> Author: d0k
> Date: Sat Oct 27 09:25:51 2012
> New Revision: 166875
>
> URL: http://llvm.org/viewvc/llvm-project?rev=166875&view=rev
> Log:
> LoopIdiom: Recognize memmove loops.
>
> This turns loops like
>   for (unsigned i = 0; i != n; ++i)
>     p[i] = p[i+1];
> into memmove, which has a highly optimized implementation in most libcs.
>
> This was really easy with the new DependenceAnalysis :)
>
> Modified:
>     llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
>     llvm/trunk/test/Transforms/LoopIdiom/basic.ll
>
> Modified: llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp?rev=166875&r1=166874&r2=166875&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp (original)
> +++ llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp Sat Oct 27 09:25:51 2012
> @@ -16,7 +16,7 @@
>  // TODO List:
>  //
>  // Future loop memory idioms to recognize:
> -//   memcmp, memmove, strlen, etc.
> +//   memcmp, strlen, etc.
>  // Future floating point idioms to recognize in -ffast-math mode:
>  //   fpowi
>  // Future integer operation idioms to recognize:
> @@ -60,8 +60,9 @@
>  #include "llvm/Transforms/Utils/Local.h"
>  using namespace llvm;
>
> -STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
> -STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
> +STATISTIC(NumMemSet, "Number of memsets formed from loop stores");
> +STATISTIC(NumMemCpy, "Number of memcpys formed from loop load+stores");
> +STATISTIC(NumMemMove, "Number of memmoves formed from loop load+stores");
>
>  namespace {
>    class LoopIdiomRecognize : public LoopPass {
> @@ -532,6 +533,7 @@
>    // stores) in the loop. We ignore the direct dependency between SI and LI here
>    // and check it later.
>    DependenceAnalysis &DA = getAnalysis<DependenceAnalysis>();
> +  bool isMemcpySafe = true;
>    for (Loop::block_iterator BI = CurLoop->block_begin(),
>         BE = CurLoop->block_end(); BI != BE; ++BI)
>      for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
> @@ -552,8 +554,14 @@
>    // Now check the dependency between SI and LI. If there is no dependency we
>    // can safely emit a memcpy.
>    OwningPtr<Dependence> Dep(DA.depends(SI, LI, true));
> -  if (Dep)
> -    return false;
> +  if (Dep) {
> +    // If there is a dependence but the direction is positive we can still
> +    // safely turn this into memmove.
> +    if (Dep->getLevels() != 1 ||
> +        Dep->getDirection(1) != Dependence::DVEntry::GT)
> +      return false;

Why the restriction to an un-nested loop? Couldn't this work for a
deeply-nested loop?

> +    isMemcpySafe = false;
> +  }
>
>    // The trip count of the loop and the base pointer of the addrec SCEV is
>    // guaranteed to be loop invariant, which means that it should dominate the
> @@ -590,12 +598,19 @@
>    Value *NumBytes =
>      Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
>
> -  CallInst *NewCall =
> -    Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
> -                         std::min(SI->getAlignment(), LI->getAlignment()));
> +  CallInst *NewCall;
> +  unsigned Align = std::min(SI->getAlignment(), LI->getAlignment());
> +  if (isMemcpySafe) {
> +    NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, Align);
> +    ++NumMemCpy;
> +  } else {
> +    NewCall = Builder.CreateMemMove(StoreBasePtr, LoadBasePtr, NumBytes, Align);
> +    ++NumMemMove;
> +  }
>    NewCall->setDebugLoc(SI->getDebugLoc());
>
> -  DEBUG(dbgs() << "  Formed memcpy: " << *NewCall << "\n"
> +  DEBUG(dbgs() << "  Formed " << (isMemcpySafe ? "memcpy: " : "memmove: ")
> +               << *NewCall << "\n"
>                 << "    from load ptr=" << *LoadEv << " at: " << *LI << "\n"
>                 << "    from store ptr=" << *StoreEv << " at: " << *SI << "\n");
>
> @@ -603,6 +618,5 @@
>    // Okay, the memset has been formed.  Zap the original store and anything that
>    // feeds into it.
>    deleteDeadInstruction(SI, *SE, TLI);
> -  ++NumMemCpy;
>    return true;
>  }
>
> Modified: llvm/trunk/test/Transforms/LoopIdiom/basic.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/basic.ll?rev=166875&r1=166874&r2=166875&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopIdiom/basic.ll (original)
> +++ llvm/trunk/test/Transforms/LoopIdiom/basic.ll Sat Oct 27 09:25:51 2012
> @@ -383,4 +383,26 @@
>
>  }
>
> + at p = common global [1024 x i8] zeroinitializer, align 16
>
> +define void @test15(i32 %n) nounwind {
> +entry:
> +  %cmp6 = icmp eq i32 %n, 0
> +  br i1 %cmp6, label %for.end, label %for.body
> +
> +for.body:                                         ; preds = %entry, %for.body
> +  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
> +  %indvars.iv.next = add i64 %indvars.iv, 1
> +  %arrayidx = getelementptr inbounds [1024 x i8]* @p, i64 0, i64 %indvars.iv.next
> +  %0 = load i8* %arrayidx, align 1
> +  %arrayidx2 = getelementptr inbounds [1024 x i8]* @p, i64 0, i64 %indvars.iv
> +  store i8 %0, i8* %arrayidx2, align 1
> +  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
> +  %exitcond = icmp eq i32 %lftr.wideiv, %n
> +  br i1 %exitcond, label %for.end, label %for.body
> +
> +for.end:                                          ; preds = %for.body, %entry
> +  ret void
> +; CHECK: @test15
> +; CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* getelementptr inbounds ([1024 x i8]* @p, i32 0, i32 0), i8* getelementptr inbounds ([1024 x i8]* @p, i64 0, i64 1),
> +}



More information about the llvm-commits mailing list