[llvm] r325641 - [DSE] Don't DSE stores that subsequent memmove calls read from
Sanjoy Das via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 20 15:19:34 PST 2018
Author: sanjoy
Date: Tue Feb 20 15:19:34 2018
New Revision: 325641
URL: http://llvm.org/viewvc/llvm-project?rev=325641&view=rev
Log:
[DSE] Don't DSE stores that subsequent memmove calls read from
Summary:
We used to remove the first memmove in cases like this:
memmove(p, p+2, 8);
memmove(p, p+2, 8);
which is incorrect. Fix this by changing isPossibleSelfRead to what was most
likely the intended behavior.
Historical note: the buggy code was added in https://reviews.llvm.org/rL120974
to address PR8728.
Reviewers: rsmith
Subscribers: mcrosier, llvm-commits, jlebar
Differential Revision: https://reviews.llvm.org/D43425
Modified:
llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
llvm/trunk/test/Transforms/DeadStoreElimination/simple.ll
Modified: llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp?rev=325641&r1=325640&r2=325641&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp Tue Feb 20 15:19:34 2018
@@ -510,8 +510,8 @@ static OverwriteResult isOverwrite(const
/// memory region into an identical pointer) then it doesn't actually make its
/// input dead in the traditional sense. Consider this case:
///
-/// memcpy(A <- B)
-/// memcpy(A <- A)
+/// memmove(A <- B)
+/// memmove(A <- A)
///
/// In this case, the second store to A does not make the first store to A dead.
/// The usual situation isn't an explicit A<-A store like this (which can be
@@ -527,23 +527,34 @@ static bool isPossibleSelfRead(Instructi
// Self reads can only happen for instructions that read memory. Get the
// location read.
MemoryLocation InstReadLoc = getLocForRead(Inst, TLI);
- if (!InstReadLoc.Ptr) return false; // Not a reading instruction.
+ if (!InstReadLoc.Ptr)
+ return false; // Not a reading instruction.
// If the read and written loc obviously don't alias, it isn't a read.
- if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false;
+ if (AA.isNoAlias(InstReadLoc, InstStoreLoc))
+ return false;
- // Okay, 'Inst' may copy over itself. However, we can still remove a the
- // DepWrite instruction if we can prove that it reads from the same location
- // as Inst. This handles useful cases like:
- // memcpy(A <- B)
- // memcpy(A <- B)
- // Here we don't know if A/B may alias, but we do know that B/B are must
- // aliases, so removing the first memcpy is safe (assuming it writes <= #
- // bytes as the second one.
- MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI);
+ if (isa<MemCpyInst>(Inst)) {
+ // LLVM's memcpy overlap semantics are not fully fleshed out (see PR11763)
+ // but in practice memcpy(A <- B) either means that A and B are disjoint or
+ // are equal (i.e. there are not partial overlaps). Given that, if we have:
+ //
+ // memcpy/memmove(A <- B) // DepWrite
+ // memcpy(A <- B) // Inst
+ //
+ // with Inst reading/writing a >= size than DepWrite, we can reason as
+ // follows:
+ //
+ // - If A == B then both the copies are no-ops, so the DepWrite can be
+ // removed.
+ // - If A != B then A and B are disjoint locations in Inst. Since
+ // Inst.size >= DepWrite.size A and B are disjoint in DepWrite too.
+ // Therefore DepWrite can be removed.
+ MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI);
- if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr))
- return false;
+ if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr))
+ return false;
+ }
// If DepWrite doesn't read memory or if we can't prove it is a must alias,
// then it can't be considered dead.
Modified: llvm/trunk/test/Transforms/DeadStoreElimination/simple.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/simple.ll?rev=325641&r1=325640&r2=325641&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/simple.ll (original)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/simple.ll Tue Feb 20 15:19:34 2018
@@ -252,6 +252,9 @@ define void @test17v(i8* %P, i8* %Q) nou
; Do not delete instruction where possible situation is:
; A = B
; A = A
+;
+; NB! See PR11763 - currently LLVM allows memcpy's source and destination to be
+; equal (but not inequal and overlapping).
define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp {
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
@@ -521,3 +524,51 @@ define void @test35(i32* noalias %p) {
store i32 0, i32* %p
ret void
}
+
+; We cannot optimize away the first memmove since %P could overlap with %Q.
+define void @test36(i8* %P, i8* %Q) {
+; CHECK-LABEL: @test36(
+; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+; CHECK-NEXT: ret
+
+ tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+ tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+ ret void
+}
+
+define void @test37(i8* %P, i8* %Q, i8* %R) {
+; CHECK-LABEL: @test37(
+; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
+; CHECK-NEXT: ret
+
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+ tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
+ ret void
+}
+
+; Same caveat about memcpy as in @test18 applies here.
+define void @test38(i8* %P, i8* %Q, i8* %R) {
+; CHECK-LABEL: @test38(
+; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
+; CHECK-NEXT: ret
+
+ tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
+ ret void
+}
+
+define void @test39(i8* %P, i8* %Q, i8* %R) {
+; CHECK-LABEL: @test39(
+; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 8, i1 false)
+; CHECK-NEXT: ret
+
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 8, i1 false)
+ ret void
+}
+
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
More information about the llvm-commits
mailing list