[llvm] r331991 - [DSE] Teach the pass about partial overwrite of atomic memory intrinsics
Daniel Neilson via llvm-commits
llvm-commits at lists.llvm.org
Thu May 10 08:12:49 PDT 2018
Author: dneilson
Date: Thu May 10 08:12:49 2018
New Revision: 331991
URL: http://llvm.org/viewvc/llvm-project?rev=331991&view=rev
Log:
[DSE] Teach the pass about partial overwrite of atomic memory intrinsics
Summary:
This change teaches DSE that the atomic memory intrinsics can be overwriten
partially in the same way as the non-atomic forms. Specifically, that the
atomic memcpy & memset can be shortened at the end and that the atomic memset
can be shortened at the beginning, if they partially overwritten
by later stores.
Reviewers: mkazantsev, skatkov, apilipenko, efriedma, rsmith, spatel, filcab, sanjoy
Reviewed By: efriedma
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D45584
Modified:
llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
Modified: llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp?rev=331991&r1=331990&r2=331991&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp Thu May 10 08:12:49 2018
@@ -278,9 +278,10 @@ static bool isShortenableAtTheEnd(Instru
default: return false;
case Intrinsic::memset:
case Intrinsic::memcpy:
+ case Intrinsic::memcpy_element_unordered_atomic:
+ case Intrinsic::memset_element_unordered_atomic:
// Do shorten memory intrinsics.
// FIXME: Add memmove if it's also safe to transform.
- // TODO: Add atomic memcpy/memset
return true;
}
}
@@ -295,9 +296,7 @@ static bool isShortenableAtTheEnd(Instru
static bool isShortenableAtTheBeginning(Instruction *I) {
// FIXME: Handle only memset for now. Supporting memcpy/memmove should be
// easily done by offsetting the source address.
- // TODO: Handle atomic memory intrinsics
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
- return II && II->getIntrinsicID() == Intrinsic::memset;
+ return isa<AnyMemSetInst>(I);
}
/// Return the pointer that is being written to.
@@ -897,7 +896,7 @@ static bool tryToShorten(Instruction *Ea
// Power of 2 vector writes are probably always a bad idea to optimize
// as any store/memset/memcpy is likely using vector instructions so
// shortening it to not vector size is likely to be slower
- MemIntrinsic *EarlierIntrinsic = cast<MemIntrinsic>(EarlierWrite);
+ auto *EarlierIntrinsic = cast<AnyMemIntrinsic>(EarlierWrite);
unsigned EarlierWriteAlign = EarlierIntrinsic->getDestAlignment();
if (!IsOverwriteEnd)
LaterOffset = int64_t(LaterOffset + LaterSize);
@@ -906,15 +905,23 @@ static bool tryToShorten(Instruction *Ea
!((EarlierWriteAlign != 0) && LaterOffset % EarlierWriteAlign == 0))
return false;
+ int64_t NewLength = IsOverwriteEnd
+ ? LaterOffset - EarlierOffset
+ : EarlierSize - (LaterOffset - EarlierOffset);
+
+ if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(EarlierWrite)) {
+ // When shortening an atomic memory intrinsic, the newly shortened
+ // length must remain an integer multiple of the element size.
+ const uint32_t ElementSize = AMI->getElementSizeInBytes();
+ if (0 != NewLength % ElementSize)
+ return false;
+ }
+
DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
<< (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *EarlierWrite
<< "\n KILLER (offset " << LaterOffset << ", " << EarlierSize
<< ")\n");
- int64_t NewLength = IsOverwriteEnd
- ? LaterOffset - EarlierOffset
- : EarlierSize - (LaterOffset - EarlierOffset);
-
Value *EarlierWriteLength = EarlierIntrinsic->getLength();
Value *TrimmedLength =
ConstantInt::get(EarlierWriteLength->getType(), NewLength);
Modified: llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll?rev=331991&r1=331990&r2=331991&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll (original)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll Thu May 10 08:12:49 2018
@@ -26,7 +26,8 @@ define void @write4to7_atomic(i32* nocap
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
-; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i32 4)
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
+; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i32 4)
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
; CHECK-NEXT: store atomic i32 1, i32* [[ARRAYIDX1]] unordered, align 4
; CHECK-NEXT: ret void
@@ -60,7 +61,8 @@ define void @write0to3_atomic(i32* nocap
; CHECK-LABEL: @write0to3_atomic(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
-; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i32 4)
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
+; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i32 4)
; CHECK-NEXT: store atomic i32 1, i32* [[P]] unordered, align 4
; CHECK-NEXT: ret void
;
@@ -76,7 +78,8 @@ define void @write0to3_atomic_weaker(i32
; CHECK-LABEL: @write0to3_atomic_weaker(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
-; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i32 4)
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
+; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i32 4)
; CHECK-NEXT: store i32 1, i32* [[P]], align 4
; CHECK-NEXT: ret void
;
@@ -111,7 +114,8 @@ define void @write0to7_atomic(i32* nocap
; CHECK-LABEL: @write0to7_atomic(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
-; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 32, i32 4)
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 8
+; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i32 4)
; CHECK-NEXT: [[P4:%.*]] = bitcast i32* [[P]] to i64*
; CHECK-NEXT: store atomic i64 1, i64* [[P4]] unordered, align 8
; CHECK-NEXT: ret void
@@ -149,7 +153,8 @@ define void @write0to7_2_atomic(i32* noc
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
-; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i32 4)
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
+; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i32 4)
; CHECK-NEXT: [[P4:%.*]] = bitcast i32* [[P]] to i64*
; CHECK-NEXT: store atomic i64 1, i64* [[P4]] unordered, align 8
; CHECK-NEXT: ret void
@@ -307,7 +312,8 @@ define void @write8To15AndThen0To7_atomi
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8*
; CHECK-NEXT: [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0
-; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[MYBASE0]], i8 0, i64 32, i32 8)
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[MYBASE0]], i64 16
+; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[TMP0]], i8 0, i64 16, i32 8)
; CHECK-NEXT: [[BASE64_0:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 0
; CHECK-NEXT: [[BASE64_1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1
; CHECK-NEXT: store atomic i64 1, i64* [[BASE64_1]] unordered, align 8
@@ -333,7 +339,8 @@ define void @write8To15AndThen0To7_atomi
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8*
; CHECK-NEXT: [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0
-; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[MYBASE0]], i8 0, i64 32, i32 8)
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[MYBASE0]], i64 16
+; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[TMP0]], i8 0, i64 16, i32 8)
; CHECK-NEXT: [[BASE64_0:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 0
; CHECK-NEXT: [[BASE64_1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1
; CHECK-NEXT: store atomic i64 1, i64* [[BASE64_1]] unordered, align 8
@@ -359,7 +366,8 @@ define void @write8To15AndThen0To7_atomi
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8*
; CHECK-NEXT: [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0
-; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[MYBASE0]], i8 0, i64 32, i32 8)
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[MYBASE0]], i64 16
+; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[TMP0]], i8 0, i64 16, i32 8)
; CHECK-NEXT: [[BASE64_0:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 0
; CHECK-NEXT: [[BASE64_1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1
; CHECK-NEXT: store i64 1, i64* [[BASE64_1]], align 8
Modified: llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll?rev=331991&r1=331990&r2=331991&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll (original)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll Thu May 10 08:12:49 2018
@@ -32,7 +32,7 @@ define void @write24to28_atomic(i32* noc
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
-; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i32 4)
+; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 24, i32 4)
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
; CHECK-NEXT: store atomic i32 1, i32* [[ARRAYIDX1]] unordered, align 4
; CHECK-NEXT: ret void
@@ -52,7 +52,7 @@ define void @write24to28_atomic_weaker(i
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
-; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i32 4)
+; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 24, i32 4)
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4
; CHECK-NEXT: ret void
@@ -87,7 +87,7 @@ define void @write28to32_atomic(i32* noc
; CHECK-LABEL: @write28to32_atomic(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
-; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 32, i32 4)
+; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i32 4)
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
; CHECK-NEXT: store atomic i32 1, i32* [[ARRAYIDX1]] unordered, align 4
; CHECK-NEXT: ret void
@@ -155,7 +155,7 @@ define void @write32to36_atomic(%struct.
; CHECK-LABEL: @write32to36_atomic(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.vec2plusi* [[P:%.*]] to i8*
-; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i32 4)
+; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 bitcast (%struct.vec2plusi* @glob2 to i8*), i64 32, i32 4)
; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_VEC2PLUSI:%.*]], %struct.vec2plusi* [[P]], i64 0, i32 2
; CHECK-NEXT: store atomic i32 1, i32* [[C]] unordered, align 4
; CHECK-NEXT: ret void
@@ -173,7 +173,7 @@ define void @write32to36_atomic_weaker(%
; CHECK-LABEL: @write32to36_atomic_weaker(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.vec2plusi* [[P:%.*]] to i8*
-; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i32 4)
+; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 bitcast (%struct.vec2plusi* @glob2 to i8*), i64 32, i32 4)
; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_VEC2PLUSI:%.*]], %struct.vec2plusi* [[P]], i64 0, i32 2
; CHECK-NEXT: store i32 1, i32* [[C]], align 4
; CHECK-NEXT: ret void
@@ -207,7 +207,7 @@ define void @write16to32_atomic(%struct.
; CHECK-LABEL: @write16to32_atomic(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.vec2* [[P:%.*]] to i8*
-; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 4)
+; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 bitcast (%struct.vec2* @glob1 to i8*), i64 16, i32 4)
; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_VEC2:%.*]], %struct.vec2* [[P]], i64 0, i32 1
; CHECK-NEXT: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* [[C]], align 4
; CHECK-NEXT: ret void
@@ -316,7 +316,7 @@ define void @write16To23AndThen24To31_at
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8*
; CHECK-NEXT: [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0
-; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[MYBASE0]], i8 0, i64 32, i32 8)
+; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[MYBASE0]], i8 0, i64 16, i32 8)
; CHECK-NEXT: [[BASE64_2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 2
; CHECK-NEXT: [[BASE64_3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 3
; CHECK-NEXT: store atomic i64 3, i64* [[BASE64_2]] unordered, align 8
@@ -342,7 +342,7 @@ define void @write16To23AndThen24To31_at
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8*
; CHECK-NEXT: [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0
-; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[MYBASE0]], i8 0, i64 32, i32 8)
+; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[MYBASE0]], i8 0, i64 16, i32 8)
; CHECK-NEXT: [[BASE64_2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 2
; CHECK-NEXT: [[BASE64_3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 3
; CHECK-NEXT: store i64 3, i64* [[BASE64_2]], align 8
@@ -368,7 +368,7 @@ define void @write16To23AndThen24To31_at
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8*
; CHECK-NEXT: [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0
-; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[MYBASE0]], i8 0, i64 32, i32 8)
+; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[MYBASE0]], i8 0, i64 16, i32 8)
; CHECK-NEXT: [[BASE64_2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 2
; CHECK-NEXT: [[BASE64_3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 3
; CHECK-NEXT: store atomic i64 3, i64* [[BASE64_2]] unordered, align 8
More information about the llvm-commits
mailing list