[llvm-commits] [llvm] r50034 - in /llvm/trunk: lib/Transforms/Scalar/MemCpyOptimizer.cpp test/Transforms/MemCpyOpt/form-memset.ll test/Transforms/MemCpyOpt/form-memset2.ll
Owen Anderson
resistor at mac.com
Mon Apr 21 00:45:10 PDT 2008
Author: resistor
Date: Mon Apr 21 02:45:10 2008
New Revision: 50034
URL: http://llvm.org/viewvc/llvm-project?rev=50034&view=rev
Log:
Refactor memcpyopt based on Chris' suggestions. Consolidate several functions
and simplify code that was fallout from the separation of memcpyopt and gvn.
Modified:
llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp
llvm/trunk/test/Transforms/MemCpyOpt/form-memset.ll
llvm/trunk/test/Transforms/MemCpyOpt/form-memset2.ll
Modified: llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp?rev=50034&r1=50033&r2=50034&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp Mon Apr 21 02:45:10 2008
@@ -14,23 +14,14 @@
#define DEBUG_TYPE "memcpyopt"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Instructions.h"
#include "llvm/ParameterAttributes.h"
-#include "llvm/Value.h"
-#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Target/TargetData.h"
@@ -40,13 +31,6 @@
STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
STATISTIC(NumMemSetInfer, "Number of memsets inferred");
-namespace {
- cl::opt<bool>
- FormMemSet("form-memset-from-stores",
- cl::desc("Transform straight-line stores to memsets"),
- cl::init(true), cl::Hidden);
-}
-
/// isBytewiseValue - If the specified value can be set by repeating the same
/// byte in memory, return the i8 value that it is represented with. This is
/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
@@ -332,13 +316,9 @@
}
// Helper fuctions
- bool processInstruction(Instruction* I,
- SmallVectorImpl<Instruction*> &toErase);
- bool processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase);
- bool processMemCpy(MemCpyInst* M, MemCpyInst* MDep,
- SmallVectorImpl<Instruction*> &toErase);
- bool performCallSlotOptzn(MemCpyInst* cpy, CallInst* C,
- SmallVectorImpl<Instruction*> &toErase);
+ bool processStore(StoreInst *SI, BasicBlock::iterator& BBI);
+ bool processMemCpy(MemCpyInst* M);
+ bool performCallSlotOptzn(MemCpyInst* cpy, CallInst* C);
bool iterateOnFunction(Function &F);
};
@@ -357,8 +337,7 @@
/// some other patterns to fold away. In particular, this looks for stores to
/// neighboring locations of memory. If it sees enough consequtive ones
/// (currently 4) it attempts to merge them together into a memcpy/memset.
-bool MemCpyOpt::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase) {
- if (!FormMemSet) return false;
+bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {
if (SI->isVolatile()) return false;
// There are two cases that are interesting for this code to handle: memcpy
@@ -473,8 +452,13 @@
cerr << *Range.TheStores[i];
cerr << "With: " << *C); C=C;
+ // Don't invalidate the iterator
+ BBI = BI;
+
// Zap all the stores.
- toErase.append(Range.TheStores.begin(), Range.TheStores.end());
+ for (SmallVector<StoreInst*, 16>::const_iterator SI = Range.TheStores.begin(),
+ SE = Range.TheStores.end(); SI != SE; ++SI)
+ (*SI)->eraseFromParent();
++NumMemSetInfer;
MadeChange = true;
}
@@ -486,8 +470,7 @@
/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
/// and checks for the possibility of a call slot optimization by having
/// the call write its result directly into the destination of the memcpy.
-bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C,
- SmallVectorImpl<Instruction*> &toErase) {
+bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
// The general transformation to keep in mind is
//
// call @func(..., src, ...)
@@ -612,7 +595,8 @@
// Remove the memcpy
MD.removeInstruction(cpy);
- toErase.push_back(cpy);
+ cpy->eraseFromParent();
+ NumMemCpyInstr++;
return true;
}
@@ -621,8 +605,23 @@
/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
/// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
/// This allows later passes to remove the first memcpy altogether.
-bool MemCpyOpt::processMemCpy(MemCpyInst* M, MemCpyInst* MDep,
- SmallVectorImpl<Instruction*> &toErase) {
+bool MemCpyOpt::processMemCpy(MemCpyInst* M) {
+ MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+
+ // The are two possible optimizations we can do for memcpy:
+ // a) memcpy-memcpy xform which exposes redundance for DSE
+ // b) call-memcpy xform for return slot optimization
+ Instruction* dep = MD.getDependency(M);
+ if (dep == MemoryDependenceAnalysis::None ||
+ dep == MemoryDependenceAnalysis::NonLocal)
+ return false;
+ else if (CallInst* C = dyn_cast<CallInst>(dep))
+ return performCallSlotOptzn(M, C);
+ else if (!isa<MemCpyInst>(dep))
+ return false;
+
+ MemCpyInst* MDep = cast<MemCpyInst>(dep);
+
// We can only transforms memcpy's where the dest of one is the source of the
// other
if (M->getSource() != MDep->getDest())
@@ -667,41 +666,16 @@
CallInst* C = CallInst::Create(MemCpyFun, args.begin(), args.end(), "", M);
- MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
if (MD.getDependency(C) == MDep) {
MD.dropInstruction(M);
- toErase.push_back(M);
+ M->eraseFromParent();
return true;
}
MD.removeInstruction(C);
- toErase.push_back(C);
- return false;
-}
-
-/// processInstruction - When calculating availability, handle an instruction
-/// by inserting it into the appropriate sets
-bool MemCpyOpt::processInstruction(Instruction *I,
- SmallVectorImpl<Instruction*> &toErase) {
- if (StoreInst *SI = dyn_cast<StoreInst>(I))
- return processStore(SI, toErase);
-
- if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) {
- MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
-
- // The are two possible optimizations we can do for memcpy:
- // a) memcpy-memcpy xform which exposes redundance for DSE
- // b) call-memcpy xform for return slot optimization
- Instruction* dep = MD.getDependency(M);
- if (dep == MemoryDependenceAnalysis::None ||
- dep == MemoryDependenceAnalysis::NonLocal)
- return false;
- if (MemCpyInst *MemCpy = dyn_cast<MemCpyInst>(dep))
- return processMemCpy(M, MemCpy, toErase);
- if (CallInst* C = dyn_cast<CallInst>(dep))
- return performCallSlotOptzn(M, C, toErase);
- return false;
- }
+ C->eraseFromParent();
+
+ NumMemCpyInstr++;
return false;
}
@@ -726,42 +700,20 @@
// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN
bool MemCpyOpt::iterateOnFunction(Function &F) {
bool changed_function = false;
-
- DominatorTree &DT = getAnalysis<DominatorTree>();
-
- SmallVector<Instruction*, 8> toErase;
-
- // Top-down walk of the dominator tree
- for (df_iterator<DomTreeNode*> DI = df_begin(DT.getRootNode()),
- E = df_end(DT.getRootNode()); DI != E; ++DI) {
- BasicBlock* BB = DI->getBlock();
+ // Walk all instruction in the function
+ for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
BI != BE;) {
- changed_function |= processInstruction(BI, toErase);
- if (toErase.empty()) {
- ++BI;
- continue;
- }
-
- // If we need some instructions deleted, do it now.
- NumMemCpyInstr += toErase.size();
+ // Avoid invalidating the iterator
+ Instruction* I = BI++;
- // Avoid iterator invalidation.
- bool AtStart = BI == BB->begin();
- if (!AtStart)
- --BI;
-
- for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
- E = toErase.end(); I != E; ++I)
- (*I)->eraseFromParent();
-
- if (AtStart)
- BI = BB->begin();
- else
- ++BI;
-
- toErase.clear();
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ changed_function |= processStore(SI, BI);
+
+ if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) {
+ changed_function |= processMemCpy(M);
+ }
}
}
Modified: llvm/trunk/test/Transforms/MemCpyOpt/form-memset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/form-memset.ll?rev=50034&r1=50033&r2=50034&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/form-memset.ll (original)
+++ llvm/trunk/test/Transforms/MemCpyOpt/form-memset.ll Mon Apr 21 02:45:10 2008
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -memcpyopt -form-memset-from-stores | llvm-dis | not grep store
-; RUN: llvm-as < %s | opt -memcpyopt -form-memset-from-stores | llvm-dis | grep {call.*llvm.memset}
+; RUN: llvm-as < %s | opt -memcpyopt | llvm-dis | not grep store
+; RUN: llvm-as < %s | opt -memcpyopt | llvm-dis | grep {call.*llvm.memset}
; All the stores in this example should be merged into a single memset.
Modified: llvm/trunk/test/Transforms/MemCpyOpt/form-memset2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/form-memset2.ll?rev=50034&r1=50033&r2=50034&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/form-memset2.ll (original)
+++ llvm/trunk/test/Transforms/MemCpyOpt/form-memset2.ll Mon Apr 21 02:45:10 2008
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -memcpyopt -form-memset-from-stores | llvm-dis | not grep store
-; RUN: llvm-as < %s | opt -memcpyopt -form-memset-from-stores | llvm-dis | grep {call.*llvm.memset} | count 3
+; RUN: llvm-as < %s | opt -memcpyopt | llvm-dis | not grep store
+; RUN: llvm-as < %s | opt -memcpyopt | llvm-dis | grep {call.*llvm.memset} | count 3
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin8"
More information about the llvm-commits
mailing list