[llvm] r330629 - [DSE] Teach the pass that atomic memory intrinsics are stores.

Daniel Neilson via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 23 12:06:49 PDT 2018


Author: dneilson
Date: Mon Apr 23 12:06:49 2018
New Revision: 330629

URL: http://llvm.org/viewvc/llvm-project?rev=330629&view=rev
Log:
[DSE] Teach the pass that atomic memory intrinsics are stores.

Summary:
This change teaches DSE that the atomic memory intrinsics are stores
that can be eliminated, and can allow other stores to be eliminated.
This change specifically does not teach DSE that these intrinsics
can be partially eliminated (i.e. length reduced, and dest/src changed);
that will be handled in another change.

Reviewers: mkazantsev, skatkov, apilipenko, efriedma, rsmith

Reviewed By: efriedma

Subscribers: dmgreen, llvm-commits

Differential Revision: https://reviews.llvm.org/D45535

Modified:
    llvm/trunk/include/llvm/Analysis/MemoryLocation.h
    llvm/trunk/lib/Analysis/MemoryLocation.cpp
    llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
    llvm/trunk/test/Transforms/DeadStoreElimination/memintrinsics.ll
    llvm/trunk/test/Transforms/DeadStoreElimination/simple.ll

Modified: llvm/trunk/include/llvm/Analysis/MemoryLocation.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/MemoryLocation.h?rev=330629&r1=330628&r2=330629&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/MemoryLocation.h (original)
+++ llvm/trunk/include/llvm/Analysis/MemoryLocation.h Mon Apr 23 12:06:49 2018
@@ -27,6 +27,10 @@ class LoadInst;
 class StoreInst;
 class MemTransferInst;
 class MemIntrinsic;
+class AtomicMemTransferInst;
+class AtomicMemIntrinsic;
+class AnyMemTransferInst;
+class AnyMemIntrinsic;
 class TargetLibraryInfo;
 
 /// Representation for a specific memory location.
@@ -90,10 +94,14 @@ public:
 
   /// Return a location representing the source of a memory transfer.
   static MemoryLocation getForSource(const MemTransferInst *MTI);
+  static MemoryLocation getForSource(const AtomicMemTransferInst *MTI);
+  static MemoryLocation getForSource(const AnyMemTransferInst *MTI);
 
   /// Return a location representing the destination of a memory set or
   /// transfer.
   static MemoryLocation getForDest(const MemIntrinsic *MI);
+  static MemoryLocation getForDest(const AtomicMemIntrinsic *MI);
+  static MemoryLocation getForDest(const AnyMemIntrinsic *MI);
 
   /// Return a location representing a particular argument of a call.
   static MemoryLocation getForArgument(ImmutableCallSite CS, unsigned ArgIdx,

Modified: llvm/trunk/lib/Analysis/MemoryLocation.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/MemoryLocation.cpp?rev=330629&r1=330628&r2=330629&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/MemoryLocation.cpp (original)
+++ llvm/trunk/lib/Analysis/MemoryLocation.cpp Mon Apr 23 12:06:49 2018
@@ -65,6 +65,14 @@ MemoryLocation MemoryLocation::get(const
 }
 
 MemoryLocation MemoryLocation::getForSource(const MemTransferInst *MTI) {
+  return getForSource(cast<AnyMemTransferInst>(MTI));
+}
+
+MemoryLocation MemoryLocation::getForSource(const AtomicMemTransferInst *MTI) {
+  return getForSource(cast<AnyMemTransferInst>(MTI));
+}
+
+MemoryLocation MemoryLocation::getForSource(const AnyMemTransferInst *MTI) {
   uint64_t Size = UnknownSize;
   if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
     Size = C->getValue().getZExtValue();
@@ -77,17 +85,25 @@ MemoryLocation MemoryLocation::getForSou
   return MemoryLocation(MTI->getRawSource(), Size, AATags);
 }
 
-MemoryLocation MemoryLocation::getForDest(const MemIntrinsic *MTI) {
+MemoryLocation MemoryLocation::getForDest(const MemIntrinsic *MI) {
+  return getForDest(cast<AnyMemIntrinsic>(MI));
+}
+
+MemoryLocation MemoryLocation::getForDest(const AtomicMemIntrinsic *MI) {
+  return getForDest(cast<AnyMemIntrinsic>(MI));
+}
+
+MemoryLocation MemoryLocation::getForDest(const AnyMemIntrinsic *MI) {
   uint64_t Size = UnknownSize;
-  if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+  if (ConstantInt *C = dyn_cast<ConstantInt>(MI->getLength()))
     Size = C->getValue().getZExtValue();
 
   // memcpy/memmove can have AA tags. For memcpy, they apply
   // to both the source and the destination.
   AAMDNodes AATags;
-  MTI->getAAMetadata(AATags);
+  MI->getAAMetadata(AATags);
 
-  return MemoryLocation(MTI->getRawDest(), Size, AATags);
+  return MemoryLocation(MI->getRawDest(), Size, AATags);
 }
 
 MemoryLocation MemoryLocation::getForArgument(ImmutableCallSite CS,

Modified: llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp?rev=330629&r1=330628&r2=330629&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp Mon Apr 23 12:06:49 2018
@@ -160,6 +160,9 @@ static bool hasAnalyzableMemoryWrite(Ins
     case Intrinsic::memset:
     case Intrinsic::memmove:
     case Intrinsic::memcpy:
+    case Intrinsic::memcpy_element_unordered_atomic:
+    case Intrinsic::memmove_element_unordered_atomic:
+    case Intrinsic::memset_element_unordered_atomic:
     case Intrinsic::init_trampoline:
     case Intrinsic::lifetime_end:
       return true;
@@ -189,7 +192,7 @@ static MemoryLocation getLocForWrite(Ins
   if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
     return MemoryLocation::get(SI);
 
-  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) {
+  if (auto *MI = dyn_cast<AnyMemIntrinsic>(Inst)) {
     // memcpy/memmove/memset.
     MemoryLocation Loc = MemoryLocation::getForDest(MI);
     return Loc;
@@ -222,7 +225,7 @@ static MemoryLocation getLocForRead(Inst
 
   // The only instructions that both read and write are the mem transfer
   // instructions (memcpy/memmove).
-  if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst))
+  if (auto *MTI = dyn_cast<AnyMemTransferInst>(Inst))
     return MemoryLocation::getForSource(MTI);
   return MemoryLocation();
 }
@@ -249,6 +252,10 @@ static bool isRemovable(Instruction *I)
     case Intrinsic::memcpy:
       // Don't remove volatile memory intrinsics.
       return !cast<MemIntrinsic>(II)->isVolatile();
+    case Intrinsic::memcpy_element_unordered_atomic:
+    case Intrinsic::memmove_element_unordered_atomic:
+    case Intrinsic::memset_element_unordered_atomic:
+      return true;
     }
   }
 
@@ -273,6 +280,7 @@ static bool isShortenableAtTheEnd(Instru
       case Intrinsic::memcpy:
         // Do shorten memory intrinsics.
         // FIXME: Add memmove if it's also safe to transform.
+        // TODO: Add atomic memcpy/memset
         return true;
     }
   }
@@ -287,6 +295,7 @@ static bool isShortenableAtTheEnd(Instru
 static bool isShortenableAtTheBeginning(Instruction *I) {
   // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
   // easily done by offsetting the source address.
+  // TODO: Handle atomic memory intrinsics
   IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
   return II && II->getIntrinsicID() == Intrinsic::memset;
 }
@@ -534,7 +543,7 @@ static bool isPossibleSelfRead(Instructi
   if (AA.isNoAlias(InstReadLoc, InstStoreLoc))
     return false;
 
-  if (isa<MemCpyInst>(Inst)) {
+  if (isa<AnyMemCpyInst>(Inst)) {
     // LLVM's memcpy overlap semantics are not fully fleshed out (see PR11763)
     // but in practice memcpy(A <- B) either means that A and B are disjoint or
     // are equal (i.e. there are not partial overlaps).  Given that, if we have:
@@ -856,8 +865,6 @@ static bool handleEndBlock(BasicBlock &B
       LoadedLoc = MemoryLocation::get(L);
     } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
       LoadedLoc = MemoryLocation::get(V);
-    } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) {
-      LoadedLoc = MemoryLocation::getForSource(MTI);
     } else if (!BBI->mayReadFromMemory()) {
       // Instruction doesn't read memory.  Note that stores that weren't removed
       // above will hit this case.

Modified: llvm/trunk/test/Transforms/DeadStoreElimination/memintrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/memintrinsics.ll?rev=330629&r1=330628&r2=330629&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/memintrinsics.ll (original)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/memintrinsics.ll Mon Apr 23 12:06:49 2018
@@ -54,10 +54,6 @@ declare void @llvm.memset.element.unorde
 
 define void @test4() {
 ; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[A:%.*]] = alloca i16, i16 1024, align 2
-; CHECK-NEXT:    [[B:%.*]] = alloca i16, i16 1024, align 2
-; CHECK-NEXT:    store atomic i16 0, i16* [[B]] unordered, align 2
-; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i16.p0i16.i16(i16* align 2 [[A]], i16* align 2 [[B]], i16 1024, i32 2)
 ; CHECK-NEXT:    ret void
 ;
   %A = alloca i16, i16 1024, align 2
@@ -73,10 +69,6 @@ define void @test4() {
 
 define void @test5() {
 ; CHECK-LABEL: @test5(
-; CHECK-NEXT:    [[A:%.*]] = alloca i16, i16 1024, align 2
-; CHECK-NEXT:    [[B:%.*]] = alloca i16, i16 1024, align 2
-; CHECK-NEXT:    store atomic i16 0, i16* [[B]] unordered, align 2
-; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i16.p0i16.i16(i16* align 2 [[A]], i16* align 2 [[B]], i16 1024, i32 2)
 ; CHECK-NEXT:    ret void
 ;
   %A = alloca i16, i16 1024, align 2
@@ -92,8 +84,6 @@ define void @test5() {
 
 define void @test6() {
 ; CHECK-LABEL: @test6(
-; CHECK-NEXT:    [[A:%.*]] = alloca i16, i16 1024, align 2
-; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i16.i16(i16* align 2 [[A]], i8 0, i16 1024, i32 2)
 ; CHECK-NEXT:    ret void
 ;
   %A = alloca i16, i16 1024, align 2

Modified: llvm/trunk/test/Transforms/DeadStoreElimination/simple.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/simple.ll?rev=330629&r1=330628&r2=330629&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/simple.ll (original)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/simple.ll Mon Apr 23 12:06:49 2018
@@ -92,9 +92,8 @@ define void @test6(i32 *%p, i8 *%q) {
 ; alias).
 define void @test6_atomic(i32* align 4 %p, i8* align 4 %q) {
 ; CHECK-LABEL: @test6_atomic(
-; CHECK-NEXT:    store atomic i32 10, i32* [[P:%.*]] unordered, align 4
 ; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[Q:%.*]], i8 42, i64 900, i32 4)
-; CHECK-NEXT:    store atomic i32 30, i32* [[P]] unordered, align 4
+; CHECK-NEXT:    store atomic i32 30, i32* [[P:%.*]] unordered, align 4
 ; CHECK-NEXT:    ret void
 ;
   store atomic i32 10, i32* %p unordered, align 4       ;; dead.
@@ -121,9 +120,8 @@ define void @test7(i32 *%p, i8 *%q, i8*
 ; alias).
 define void @test7_atomic(i32* align 4 %p, i8* align 4 %q, i8* noalias align 4 %r) {
 ; CHECK-LABEL: @test7_atomic(
-; CHECK-NEXT:    store atomic i32 10, i32* [[P:%.*]] unordered, align 4
 ; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 [[Q:%.*]], i8* align 4 [[R:%.*]], i64 900, i32 4)
-; CHECK-NEXT:    store atomic i32 30, i32* [[P]] unordered, align 4
+; CHECK-NEXT:    store atomic i32 30, i32* [[P:%.*]] unordered, align 4
 ; CHECK-NEXT:    ret void
 ;
   store atomic i32 10, i32* %p unordered, align 4       ;; dead.
@@ -292,7 +290,6 @@ define void @test15(i8* %P, i8* %Q) noun
 define void @test15_atomic(i8* %P, i8* %Q) nounwind ssp {
 ; CHECK-LABEL: @test15_atomic(
 ; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
-; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1)
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
@@ -300,11 +297,10 @@ define void @test15_atomic(i8* %P, i8* %
   ret void
 }
 
-; It would only be valid to remove the non-atomic memcpy
+;; Fully dead overwrite of memcpy.
 define void @test15_atomic_weaker(i8* %P, i8* %Q) nounwind ssp {
 ; CHECK-LABEL: @test15_atomic_weaker(
-; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false)
-; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1)
+; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false)
@@ -312,11 +308,10 @@ define void @test15_atomic_weaker(i8* %P
   ret void
 }
 
-; It would only be valid to remove the non-atomic memcpy
+;; Fully dead overwrite of memcpy.
 define void @test15_atomic_weaker_2(i8* %P, i8* %Q) nounwind ssp {
 ; CHECK-LABEL: @test15_atomic_weaker_2(
-; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
-; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false)
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
@@ -338,8 +333,7 @@ define void @test16(i8* %P, i8* %Q) noun
 ;; Full overwrite of smaller memcpy.
 define void @test16_atomic(i8* %P, i8* %Q) nounwind ssp {
 ; CHECK-LABEL: @test16_atomic(
-; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 8, i32 1)
-; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1)
+; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i32 1)
@@ -350,8 +344,7 @@ define void @test16_atomic(i8* %P, i8* %
 ;; Full overwrite of smaller memory where overwrite has stronger atomicity
 define void @test16_atomic_weaker(i8* %P, i8* %Q) nounwind ssp {
 ; CHECK-LABEL: @test16_atomic_weaker(
-; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 8, i1 false)
-; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1)
+; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i1 false)
@@ -362,8 +355,7 @@ define void @test16_atomic_weaker(i8* %P
 ;; Full overwrite of smaller memory where overwrite has weaker atomicity.
 define void @test16_atomic_weaker_2(i8* %P, i8* %Q) nounwind ssp {
 ; CHECK-LABEL: @test16_atomic_weaker_2(
-; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 8, i32 1)
-; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false)
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i32 1)
@@ -385,8 +377,7 @@ define void @test17(i8* %P, i8* noalias
 ;; Overwrite of memset by memcpy.
 define void @test17_atomic(i8* %P, i8* noalias %Q) nounwind ssp {
 ; CHECK-LABEL: @test17_atomic(
-; CHECK-NEXT:    tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 [[P:%.*]], i8 42, i64 8, i32 1)
-; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
+; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i32 1)
@@ -398,8 +389,7 @@ define void @test17_atomic(i8* %P, i8* n
 ;; remove the memset.
 define void @test17_atomic_weaker(i8* %P, i8* noalias %Q) nounwind ssp {
 ; CHECK-LABEL: @test17_atomic_weaker(
-; CHECK-NEXT:    tail call void @llvm.memset.p0i8.i64(i8* align 1 [[P:%.*]], i8 42, i64 8, i1 false)
-; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
+; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.memset.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i1 false)
@@ -411,8 +401,7 @@ define void @test17_atomic_weaker(i8* %P
 ;; the memset.
 define void @test17_atomic_weaker_2(i8* %P, i8* noalias %Q) nounwind ssp {
 ; CHECK-LABEL: @test17_atomic_weaker_2(
-; CHECK-NEXT:    tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 [[P:%.*]], i8 42, i64 8, i32 1)
-; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q:%.*]], i64 12, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false)
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i32 1)




More information about the llvm-commits mailing list