[llvm-commits] [llvm] r119682 - in /llvm/trunk: lib/Transforms/Scalar/ScalarReplAggregates.cpp test/Transforms/ScalarRepl/memcpy-from-global.ll

Wed Nov 17 22:20:47 PST 2010

Author: lattner
Date: Thu Nov 18 00:20:47 2010
New Revision: 119682

URL: http://llvm.org/viewvc/llvm-project?rev=119682&view=rev
Log:
fix a small oversight in the "eliminate memcpy from constant global"
optimization.  If the alloca that is "memcpy'd from constant" also has
a memcpy from *it*, ignore it: it is a load.  We now optimize the testcase to:

define void @test2() {
  %B = alloca %T
  %a = bitcast %T* @G to i8*
  %b = bitcast %T* %B to i8*
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 124, i32 4, i1 false)
  call void @bar(i8* %b)
  ret void
}

previously we would generate:

define void @test() {
  %B = alloca %T
  %b = bitcast %T* %B to i8*
  %G.0 = getelementptr inbounds %T* @G, i32 0, i32 0
  %tmp3 = load i8* %G.0, align 4
  %G.1 = getelementptr inbounds %T* @G, i32 0, i32 1
  %G.15 = bitcast [123 x i8]* %G.1 to i8*
  %1 = bitcast [123 x i8]* %G.1 to i984*
  %srcval = load i984* %1, align 1
  %B.0 = getelementptr inbounds %T* %B, i32 0, i32 0
  store i8 %tmp3, i8* %B.0, align 4
  %B.1 = getelementptr inbounds %T* %B, i32 0, i32 1
  %B.12 = bitcast [123 x i8]* %B.1 to i8*
  %2 = bitcast [123 x i8]* %B.1 to i984*
  store i984 %srcval, i984* %2, align 1
  call void @bar(i8* %b)
  ret void
}


Modified:
    llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp
    llvm/trunk/test/Transforms/ScalarRepl/memcpy-from-global.ll

Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=119682&r1=119681&r2=119682&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Thu Nov 18 00:20:47 2010
@@ -1789,10 +1789,11 @@
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
     User *U = cast<Instruction>(*UI);
 
-    if (LoadInst *LI = dyn_cast<LoadInst>(U))
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
       // Ignore non-volatile loads, they are always ok.
-      if (!LI->isVolatile())
-        continue;
+      if (LI->isVolatile()) return false;
+      continue;
+    }
     
     if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
       // If uses of the bitcast are ok, we are ok.
@@ -1814,6 +1815,13 @@
     MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
     if (MI == 0)
       return false;
+    
+    // If the transfer is using the alloca as a source of the transfer, then
+    // it (unless the transfer is volatile).
+    if (UI.getOperandNo() == 1) {
+      if (MI->isVolatile()) return false;
+      continue;
+    }
 
     // If we already have seen a copy, reject the second one.
     if (TheCopy) return false;

Modified: llvm/trunk/test/Transforms/ScalarRepl/memcpy-from-global.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarRepl/memcpy-from-global.ll?rev=119682&r1=119681&r2=119682&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/ScalarRepl/memcpy-from-global.ll (original)
+++ llvm/trunk/test/Transforms/ScalarRepl/memcpy-from-global.ll Thu Nov 18 00:20:47 2010
@@ -39,3 +39,32 @@
 }
 
 declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+%T = type { i8, [123 x i8] }
+
+ at G = constant %T {i8 1, [123 x i8] zeroinitializer }
+
+define void @test2() {
+  %A = alloca %T
+  %B = alloca %T
+  %a = bitcast %T* %A to i8*
+  %b = bitcast %T* %B to i8*
+
+; CHECK: @test2
+
+; %A alloca is deleted
+; CHECK-NEXT: %B = alloca %T
+
+; use @G instead of %A
+; CHECK-NEXT: %a = bitcast %T* @G to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 124, i32 4, i1 false)
+  call void @bar(i8* %b)
+  ret void
+}
+
+declare void @bar(i8*)