[llvm-commits] [llvm] r123089 - in /llvm/trunk: lib/Transforms/Scalar/MemCpyOptimizer.cpp test/Transforms/MemCpyOpt/form-memset.ll

Chris Lattner sabre at nondot.org
Sat Jan 8 13:19:19 PST 2011


Author: lattner
Date: Sat Jan  8 15:19:19 2011
New Revision: 123089

URL: http://llvm.org/viewvc/llvm-project?rev=123089&view=rev
Log:
Merge memsets followed by neighboring memsets and other stores into
larger memsets.  Among other things, this fixes rdar://8760394 and
allows us to handle "Example 2" from http://blog.regehr.org/archives/320,
compiling it into a single 4096-byte memset:

_mad_synth_mute:                        ## @mad_synth_mute
## BB#0:                                ## %entry
	pushq	%rax
	movl	$4096, %esi             ## imm = 0x1000
	callq	___bzero
	popq	%rax
	ret


Modified:
    llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp
    llvm/trunk/test/Transforms/MemCpyOpt/form-memset.ll

Modified: llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp?rev=123089&r1=123088&r2=123089&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp Sat Jan  8 15:19:19 2011
@@ -322,6 +322,7 @@
   
     // Helper fuctions
     bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
+    bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI);
     bool processMemCpy(MemCpyInst *M);
     bool processMemMove(MemMoveInst *M);
     bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
@@ -511,6 +512,17 @@
   return false;
 }
 
+bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
+  // See if there is another memset or store neighboring this memset which
+  // allows us to widen out the memset to do a single larger store.
+  if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
+                                            MSI->getValue())) {
+    BBI = I;  // Don't invalidate iterator.
+    return true;
+  }
+  return false;
+}
+
 
 /// performCallSlotOptzn - takes a memcpy and a call that it depends on,
 /// and checks for the possibility of a call slot optimization by having
@@ -775,6 +787,7 @@
       return true;
     }
   }
+  
   return false;
 }
 
@@ -884,11 +897,13 @@
       
       if (StoreInst *SI = dyn_cast<StoreInst>(I))
         MadeChange |= processStore(SI, BI);
-      else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I)) {
+      else if (MemSetInst *M = dyn_cast<MemSetInst>(I))
+        RepeatInstruction = processMemSet(M, BI);
+      else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
         RepeatInstruction = processMemCpy(M);
-      } else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) {
+      else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
         RepeatInstruction = processMemMove(M);
-      } else if (CallSite CS = (Value*)I) {
+      else if (CallSite CS = (Value*)I) {
         for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
           if (CS.paramHasAttr(i+1, Attribute::ByVal))
             MadeChange |= processByValArgument(CS, i);

Modified: llvm/trunk/test/Transforms/MemCpyOpt/form-memset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/form-memset.ll?rev=123089&r1=123088&r2=123089&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/form-memset.ll (original)
+++ llvm/trunk/test/Transforms/MemCpyOpt/form-memset.ll Sat Jan  8 15:19:19 2011
@@ -164,6 +164,7 @@
 declare void @foo(%struct.MV*, %struct.MV*, i8*)
 
 
+; Store followed by memset.
 define void @test3(i32* nocapture %P) nounwind ssp {
 entry:
   %arrayidx = getelementptr inbounds i32* %P, i64 1
@@ -177,6 +178,7 @@
 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
 }
 
+; store followed by memset, different offset scenario
 define void @test4(i32* nocapture %P) nounwind ssp {
 entry:
   store i32 0, i32* %P, align 4
@@ -191,4 +193,30 @@
 
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 
+; Memset followed by store.
+define void @test5(i32* nocapture %P) nounwind ssp {
+entry:
+  %add.ptr = getelementptr inbounds i32* %P, i64 2
+  %0 = bitcast i32* %add.ptr to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false)
+  %arrayidx = getelementptr inbounds i32* %P, i64 1
+  store i32 0, i32* %arrayidx, align 4
+  ret void
+; CHECK: @test5
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
+}
+
+;; Memset followed by memset.
+define void @test6(i32* nocapture %P) nounwind ssp {
+entry:
+  %0 = bitcast i32* %P to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 12, i32 1, i1 false)
+  %add.ptr = getelementptr inbounds i32* %P, i64 3
+  %1 = bitcast i32* %add.ptr to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 12, i32 1, i1 false)
+  ret void
+; CHECK: @test6
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %2, i8 0, i64 24, i32 1, i1 false)
+}
 





More information about the llvm-commits mailing list