[llvm-commits] [llvm] r137888 - in /llvm/trunk: lib/Transforms/Scalar/DeadStoreElimination.cpp lib/Transforms/Scalar/GVN.cpp lib/Transforms/Scalar/MemCpyOptimizer.cpp test/Transforms/DeadStoreElimination/atomic.ll test/Transforms/GVN/atomic.ll test/Transforms/MemCpyOpt/atomic.ll

Wed Aug 17 15:22:24 PDT 2011

Author: efriedma
Date: Wed Aug 17 17:22:24 2011
New Revision: 137888

URL: http://llvm.org/viewvc/llvm-project?rev=137888&view=rev
Log:
Atomic load/store handling for the passes using memdep (GVN, DSE, memcpyopt).


Added:
    llvm/trunk/test/Transforms/DeadStoreElimination/atomic.ll
    llvm/trunk/test/Transforms/GVN/atomic.ll
    llvm/trunk/test/Transforms/MemCpyOpt/atomic.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
    llvm/trunk/lib/Transforms/Scalar/GVN.cpp
    llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Modified: llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp?rev=137888&r1=137887&r2=137888&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp Wed Aug 17 17:22:24 2011
@@ -213,9 +213,9 @@
 /// isRemovable - If the value of this instruction and the memory it writes to
 /// is unused, may we delete this instruction?
 static bool isRemovable(Instruction *I) {
-  // Don't remove volatile stores.
+  // Don't remove volatile/atomic stores.
   if (StoreInst *SI = dyn_cast<StoreInst>(I))
-    return !SI->isVolatile();
+    return SI->isUnordered();
   
   IntrinsicInst *II = cast<IntrinsicInst>(I);
   switch (II->getIntrinsicID()) {
@@ -447,7 +447,7 @@
     if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
       if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
         if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
-            SI->getOperand(0) == DepLoad && !SI->isVolatile()) {
+            SI->getOperand(0) == DepLoad && isRemovable(SI)) {
           DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n  "
                        << "LOAD: " << *DepLoad << "\n  STORE: " << *SI << '\n');
           
@@ -670,6 +670,8 @@
     
     // If we encounter a use of the pointer, it is no longer considered dead
     if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
+      if (!L->isUnordered()) // Be conservative with atomic/volatile load
+        break;
       LoadedLoc = AA->getLocation(L);
     } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
       LoadedLoc = AA->getLocation(V);

Modified: llvm/trunk/lib/Transforms/Scalar/GVN.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/GVN.cpp?rev=137888&r1=137887&r2=137888&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/GVN.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/GVN.cpp Wed Aug 17 17:22:24 2011
@@ -982,8 +982,8 @@
   unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType());
   unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
   if (Offset+LoadSize > SrcValSize) {
-    assert(!SrcVal->isVolatile() && "Cannot widen volatile load!");
-    assert(isa<IntegerType>(SrcVal->getType())&&"Can't widen non-integer load");
+    assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
+    assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
     // If we have a load/load clobber an DepLI can be widened to cover this
     // load, then we should widen it to the next power of 2 size big enough!
     unsigned NewLoadSize = Offset+LoadSize;
@@ -1669,7 +1669,7 @@
   if (!MD)
     return false;
 
-  if (L->isVolatile())
+  if (!L->isSimple())
     return false;
 
   if (L->use_empty()) {

Modified: llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp?rev=137888&r1=137887&r2=137888&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp Wed Aug 17 17:22:24 2011
@@ -384,7 +384,7 @@
     
     if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
       // If this is a store, see if we can merge it in.
-      if (NextStore->isVolatile()) break;
+      if (!NextStore->isSimple()) break;
     
       // Check to see if this stored value is of the same byte-splattable value.
       if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
@@ -479,7 +479,7 @@
 
 
 bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
-  if (SI->isVolatile()) return false;
+  if (!SI->isSimple()) return false;
   
   if (TD == 0) return false;
 
@@ -487,7 +487,7 @@
   // happen to be using a load-store pair to implement it, rather than
   // a memcpy.
   if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
-    if (!LI->isVolatile() && LI->hasOneUse() &&
+    if (LI->isSimple() && LI->hasOneUse() &&
         LI->getParent() == SI->getParent()) {
       MemDepResult ldep = MD->getDependency(LI);
       CallInst *C = 0;

Added: llvm/trunk/test/Transforms/DeadStoreElimination/atomic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/atomic.ll?rev=137888&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/atomic.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/atomic.ll Wed Aug 17 17:22:24 2011
@@ -0,0 +1,107 @@
+; RUN: opt -basicaa -dse -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.0"
+
+; Sanity tests for atomic stores.
+; Note that it turns out essentially every transformation DSE does is legal on
+; atomic ops, just some transformations are not allowed across them. 
+
+ at x = common global i32 0, align 4
+ at y = common global i32 0, align 4
+
+declare void @randomop(i32*)
+
+; DSE across unordered store (allowed)
+define void @test1()  nounwind uwtable ssp {
+; CHECK: test1
+; CHECK-NOT: store i32 0
+; CHECK: store i32 1
+entry:
+  store i32 0, i32* @x
+  store atomic i32 0, i32* @y unordered, align 4
+  store i32 1, i32* @x
+  ret void
+}
+
+; DSE across seq_cst load (allowed in theory; not implemented ATM)
+define i32 @test2()  nounwind uwtable ssp {
+; CHECK: test2
+; CHECK: store i32 0
+; CHECK: store i32 1
+entry:
+  store i32 0, i32* @x
+  %x = load atomic i32* @y seq_cst, align 4
+  store i32 1, i32* @x
+  ret i32 %x
+}
+
+; DSE across seq_cst store (store before atomic store must not be removed)
+define void @test3()  nounwind uwtable ssp {
+; CHECK: test3
+; CHECK: store i32
+; CHECK: store atomic i32 2
+entry:
+  store i32 0, i32* @x
+  store atomic i32 2, i32* @y seq_cst, align 4
+  store i32 1, i32* @x
+  ret void
+}
+
+; DSE remove unordered store (allowed)
+define void @test4()  nounwind uwtable ssp {
+; CHECK: test4
+; CHECK-NOT: store atomic
+; CHECK: store i32 1
+entry:
+  store atomic i32 0, i32* @x unordered, align 4
+  store i32 1, i32* @x
+  ret void
+}
+
+; DSE unordered store overwriting non-atomic store (allowed)
+define void @test5()  nounwind uwtable ssp {
+; CHECK: test5
+; CHECK: store atomic i32 1
+entry:
+  store i32 0, i32* @x
+  store atomic i32 1, i32* @x unordered, align 4
+  ret void
+}
+
+; DSE no-op unordered atomic store (allowed)
+define void @test6()  nounwind uwtable ssp {
+; CHECK: test6
+; CHECK-NOT: store
+; CHECK: ret void
+entry:
+  %x = load atomic i32* @x unordered, align 4
+  store atomic i32 %x, i32* @x unordered, align 4
+  ret void
+}
+
+; DSE seq_cst store (be conservative; DSE doesn't have infrastructure
+; to reason about atomic operations).
+define void @test7()  nounwind uwtable ssp {
+; CHECK: test7
+; CHECK: store atomic 
+entry:
+  %a = alloca i32
+  store atomic i32 0, i32* %a seq_cst, align 4
+  ret void
+}
+
+; DSE and seq_cst load (be conservative; DSE doesn't have infrastructure
+; to reason about atomic operations).
+define i32 @test8()  nounwind uwtable ssp {
+; CHECK: test8
+; CHECK: store
+; CHECK: load atomic 
+entry:
+  %a = alloca i32
+  call void @randomop(i32* %a)
+  store i32 0, i32* %a, align 4
+  %x = load atomic i32* @x seq_cst, align 4
+  ret i32 %x
+}
+

Added: llvm/trunk/test/Transforms/GVN/atomic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GVN/atomic.ll?rev=137888&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/GVN/atomic.ll (added)
+++ llvm/trunk/test/Transforms/GVN/atomic.ll Wed Aug 17 17:22:24 2011
@@ -0,0 +1,80 @@
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.0"
+
+ at x = common global i32 0, align 4
+ at y = common global i32 0, align 4
+
+; GVN across unordered store (allowed)
+define i32 @test1() nounwind uwtable ssp {
+; CHECK: test1
+; CHECK: add i32 %x, %x
+entry:
+  %x = load i32* @y
+  store atomic i32 %x, i32* @x unordered, align 4
+  %y = load i32* @y
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; GVN across seq_cst store (allowed in theory; not implemented ATM)
+define i32 @test2() nounwind uwtable ssp {
+; CHECK: test2
+; CHECK: add i32 %x, %y
+entry:
+  %x = load i32* @y
+  store atomic i32 %x, i32* @x seq_cst, align 4
+  %y = load i32* @y
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; GVN across unordered load (allowed)
+define i32 @test3() nounwind uwtable ssp {
+; CHECK: test3
+; CHECK: add i32 %x, %x
+entry:
+  %x = load i32* @y
+  %y = load atomic i32* @x unordered, align 4
+  %z = load i32* @y
+  %a = add i32 %x, %z
+  %b = add i32 %y, %a
+  ret i32 %b
+}
+
+; GVN across acquire load (load after atomic load must not be removed)
+define i32 @test4() nounwind uwtable ssp {
+; CHECK: test4
+; CHECK: load atomic i32* @x
+; CHECK: load i32* @y
+entry:
+  %x = load i32* @y
+  %y = load atomic i32* @x seq_cst, align 4
+  %x2 = load i32* @y
+  %x3 = add i32 %x, %x2
+  %y2 = add i32 %y, %x3
+  ret i32 %y2
+}
+
+; GVN load to unordered load (allowed)
+define i32 @test5() nounwind uwtable ssp {
+; CHECK: test5
+; CHECK: add i32 %x, %x
+entry:
+  %x = load atomic i32* @x unordered, align 4
+  %y = load i32* @x
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; GVN unordered load to load (unordered load must not be removed)
+define i32 @test6() nounwind uwtable ssp {
+; CHECK: test6
+; CHECK: load atomic i32* @x unordered
+entry:
+  %x = load i32* @x
+  %x2 = load atomic i32* @x unordered, align 4
+  %x3 = add i32 %x, %x2
+  ret i32 %x3
+}

Added: llvm/trunk/test/Transforms/MemCpyOpt/atomic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/atomic.ll?rev=137888&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/atomic.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/atomic.ll Wed Aug 17 17:22:24 2011
@@ -0,0 +1,41 @@
+; RUN: opt -basicaa -memcpyopt -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.0"
+
+ at x = global i32 0
+
+declare void @otherf(i32*)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+; memcpyopt should not touch atomic ops
+define void @test1() nounwind uwtable ssp {
+; CHECK: test1
+; CHECK: store atomic
+  %x = alloca [101 x i32], align 16
+  %bc = bitcast [101 x i32]* %x to i8*
+  call void @llvm.memset.p0i8.i64(i8* %bc, i8 0, i64 400, i32 16, i1 false)
+  %gep1 = getelementptr inbounds [101 x i32]* %x, i32 0, i32 100
+  store atomic i32 0, i32* %gep1 unordered, align 4
+  %gep2 = getelementptr inbounds [101 x i32]* %x, i32 0, i32 0
+  call void @otherf(i32* %gep2)
+  ret void
+}
+
+; memcpyopt across unordered store
+define void @test2() nounwind uwtable ssp {
+; CHECK: test2
+; CHECK: call
+; CHECK-NEXT: store atomic
+; CHECK-NEXT: call
+  %old = alloca i32
+  %new = alloca i32
+  call void @otherf(i32* nocapture %old)
+  store atomic i32 0, i32* @x unordered, align 4
+  %v = load i32* %old
+  store i32 %v, i32* %new
+  call void @otherf(i32* nocapture %new)  
+  ret void
+}
+