[llvm-commits] [llvm] r46693 - in /llvm/trunk: lib/Transforms/Scalar/GVN.cpp test/Transforms/GVN/memcpy.ll
Owen Anderson
resistor at mac.com
Sun Feb 3 18:59:58 PST 2008
Author: resistor
Date: Sun Feb 3 20:59:58 2008
New Revision: 46693
URL: http://llvm.org/viewvc/llvm-project?rev=46693&view=rev
Log:
Allow GVN to hack on memcpy's, making them open to further optimization.
Added:
llvm/trunk/test/Transforms/GVN/memcpy.ll
Modified:
llvm/trunk/lib/Transforms/Scalar/GVN.cpp
Modified: llvm/trunk/lib/Transforms/Scalar/GVN.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/GVN.cpp?rev=46693&r1=46692&r2=46693&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/GVN.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/GVN.cpp Sun Feb 3 20:59:58 2008
@@ -19,6 +19,7 @@
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
#include "llvm/Instructions.h"
#include "llvm/Value.h"
#include "llvm/ADT/BitVector.h"
@@ -736,6 +737,7 @@
SmallVector<Instruction*, 4>& toErase);
bool processNonLocalLoad(LoadInst* L,
SmallVector<Instruction*, 4>& toErase);
+ bool processMemCpy(MemCpyInst* M, SmallVector<Instruction*, 4>& toErase);
Value *GetValueForBlock(BasicBlock *BB, LoadInst* orig,
DenseMap<BasicBlock*, Value*> &Phis,
bool top_level = false);
@@ -1017,6 +1019,84 @@
return deletedLoad;
}
+/// processMemCpy - perform simplication of memcpy's. If we have memcpy A which
+/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
+/// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
+/// This allows later passes to remove the first memcpy altogether.
+bool GVN::processMemCpy(MemCpyInst* M,
+ SmallVector<Instruction*, 4>& toErase) {
+ MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+
+ // First, we have to check that the dependency is another memcpy
+ Instruction* dep = MD.getDependency(M);
+ if (dep == MemoryDependenceAnalysis::None ||
+ dep == MemoryDependenceAnalysis::NonLocal ||
+ !isa<MemCpyInst>(dep))
+ return false;
+
+ // We can only transforms memcpy's where the dest of one is the source of the
+ // other
+ MemCpyInst* MDep = cast<MemCpyInst>(dep);
+ if (M->getSource() != MDep->getDest())
+ return false;
+
+ // Second, the length of the memcpy's must be the same, or the preceeding one
+ // must be larger than the following one.
+ Value* DepLength = MDep->getLength();
+ uint64_t CpySize = ~0UL;
+ uint64_t DepSize = ~0UL;
+ if (isa<ConstantInt>(DepLength)) {
+ if (isa<ConstantInt>(M->getLength())) {
+ if (cast<ConstantInt>(DepLength)->getLimitedValue() <
+ cast<ConstantInt>(M->getLength())->getLimitedValue()) {
+ return false;
+ } else {
+ CpySize = cast<ConstantInt>(M->getLength())->getLimitedValue();
+ DepSize = cast<ConstantInt>(DepLength)->getLimitedValue();
+ }
+ } else {
+ return false;
+ }
+ } else {
+ return false;
+ }
+
+ // Finally, we have to make sure that the dest of the second does not
+ // alias the source of the first
+ AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
+ if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) !=
+ AliasAnalysis::NoAlias) {
+ // If they don't, we can still make the transformation by first turning M
+ // into a memmove rather than a memcpy.
+ bool is32bit = M->getIntrinsicID() == Intrinsic::memcpy_i32;
+ Function* MemMoveFun = Intrinsic::getDeclaration(
+ M->getParent()->getParent()->getParent(),
+ is32bit ? Intrinsic::memmove_i32 :
+ Intrinsic::memmove_i64);
+
+ std::vector<Value*> args;
+ args.push_back(M->getRawDest());
+ args.push_back(MDep->getRawSource());
+ args.push_back(M->getLength());
+ args.push_back(M->getAlignment());
+
+ new CallInst(MemMoveFun, args.begin(), args.end(), "", M);
+
+ MD.removeInstruction(M);
+ toErase.push_back(M);
+
+ return true;
+ }
+
+ // If all checks passed, then we can transform these memcpy's
+ M->setSource(MDep->getRawSource());
+
+ // Reset dependence information for the memcpy
+ MD.removeInstruction(M);
+
+ return true;
+}
+
/// processInstruction - When calculating availability, handle an instruction
/// by inserting it into the appropriate sets
bool GVN::processInstruction(Instruction* I,
@@ -1025,6 +1105,8 @@
SmallVector<Instruction*, 4>& toErase) {
if (LoadInst* L = dyn_cast<LoadInst>(I)) {
return processLoad(L, lastSeenLoad, toErase);
+ } else if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) {
+ return processMemCpy(M, toErase);
}
unsigned num = VN.lookup_or_add(I);
Added: llvm/trunk/test/Transforms/GVN/memcpy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GVN/memcpy.ll?rev=46693&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/GVN/memcpy.ll (added)
+++ llvm/trunk/test/Transforms/GVN/memcpy.ll Sun Feb 3 20:59:58 2008
@@ -0,0 +1,22 @@
+; RUN: llvm-as < %s | opt -gvn -dse | llvm-dis | not grep {i8* %agg.result21, i8* %tmp219}
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin9"
+
+define void @ccosl({ x86_fp80, x86_fp80 }* sret %agg.result, x86_fp80 %z.0, x86_fp80 %z.1) nounwind {
+entry:
+ %tmp2 = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=1]
+ %memtmp = alloca { x86_fp80, x86_fp80 }, align 16 ; <{ x86_fp80, x86_fp80 }*> [#uses=2]
+ %tmp5 = sub x86_fp80 0xK80000000000000000000, %z.1 ; <x86_fp80> [#uses=1]
+ call void @ccoshl( { x86_fp80, x86_fp80 }* sret %memtmp, x86_fp80 %tmp5, x86_fp80 %z.0 ) nounwind
+ %tmp219 = bitcast { x86_fp80, x86_fp80 }* %tmp2 to i8* ; <i8*> [#uses=2]
+ %memtmp20 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8* ; <i8*> [#uses=1]
+ call void @llvm.memcpy.i32( i8* %tmp219, i8* %memtmp20, i32 32, i32 16 )
+ %agg.result21 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8* ; <i8*> [#uses=1]
+ call void @llvm.memcpy.i32( i8* %agg.result21, i8* %tmp219, i32 32, i32 16 )
+ ret void
+}
+
+declare void @ccoshl({ x86_fp80, x86_fp80 }* sret , x86_fp80, x86_fp80) nounwind
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
More information about the llvm-commits
mailing list