[llvm] b5e208f - [DSE] Support looking through memory phis at end of function.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 30 05:28:23 PDT 2022
Author: Florian Hahn
Date: 2022-08-30T13:27:51+01:00
New Revision: b5e208fcbaa6c76a33a96e230e9df2121bed2a10
URL: https://github.com/llvm/llvm-project/commit/b5e208fcbaa6c76a33a96e230e9df2121bed2a10
DIFF: https://github.com/llvm/llvm-project/commit/b5e208fcbaa6c76a33a96e230e9df2121bed2a10.diff
LOG: [DSE] Support looking through memory phis at end of function.
Update isWriteAtEndOfFunction to look through MemoryPhis. The reason
MemoryPhis were skipped so far was the known AliasAnalysis issue with it
missing loop-carried dependences.
This problem is already addressed in other parts of the code by skipping
MemoryDefs that may be in difference loops. I think the same logic can
be applied here.
This can have a substantial impact on the number of stores removed in
some cases. For MultiSource/SPEC2006/SPEC2017 with -O3:
```
Metric: dse.NumFastStores
Program dse.NumFastStores
base patch diff
External/S...CINT2017rate/557.xz_r/557.xz_r 14.00 45.00 221.4%
External/S...te/538.imagick_r/538.imagick_r 439.00 1267.00 188.6%
MultiSourc...e/Applications/SIBsim4/SIBsim4 6.00 15.00 150.0%
MultiSourc...Prolangs-C/simulator/simulator 3.00 7.00 133.3%
MultiSource/Applications/siod/siod 3.00 7.00 133.3%
MultiSourc...arks/FreeBench/distray/distray 6.00 9.00 50.0%
MultiSourc...e/Applications/obsequi/Obsequi 22.00 30.00 36.4%
MultiSource/Benchmarks/Ptrdist/bc/bc 23.00 28.00 21.7%
External/S...NT2017rate/502.gcc_r/502.gcc_r 1258.00 1512.00 20.2%
External/S...te/520.omnetpp_r/520.omnetpp_r 954.00 1143.00 19.8%
External/S...rate/510.parest_r/510.parest_r 5961.00 7122.00 19.5%
External/S...C/CINT2006/445.gobmk/445.gobmk 47.00 56.00 19.1%
External/S...00.perlbench_r/500.perlbench_r 241.00 286.00 18.7%
External/S...NT2006/471.omnetpp/471.omnetpp 36.00 42.00 16.7%
External/S...06/400.perlbench/400.perlbench 183.00 210.00 14.8%
MultiSource/Applications/SPASS/SPASS 72.00 81.00 12.5%
External/S...17rate/541.leela_r/541.leela_r 72.00 80.00 11.1%
External/SPEC/CINT2006/403.gcc/403.gcc 585.00 642.00 9.7%
MultiSourc...e/Applications/sqlite3/sqlite3 120.00 131.00 9.2%
MultiSourc...Applications/hexxagon/hexxagon 11.00 12.00 9.1%
External/S.../CFP2006/453.povray/453.povray 566.00 615.00 8.7%
External/S...rate/511.povray_r/511.povray_r 578.00 627.00 8.5%
External/S...FP2006/482.sphinx3/482.sphinx3 12.00 13.00 8.3%
MultiSource/Applications/oggenc/oggenc 130.00 140.00 7.7%
MultiSourc...e/Applications/ClamAV/clamscan 250.00 268.00 7.2%
MultiSourc.../mediabench/jpeg/jpeg-6a/cjpeg 19.00 20.00 5.3%
MultiSourc...ch/consumer-jpeg/consumer-jpeg 19.00 20.00 5.3%
External/S...te/526.blender_r/526.blender_r 3747.00 3928.00 4.8%
MultiSourc...OE-ProxyApps-C++/miniFE/miniFE 104.00 108.00 3.8%
MultiSourc...ch/consumer-lame/consumer-lame 54.00 56.00 3.7%
MultiSource/Benchmarks/Bullet/bullet 1222.00 1264.00 3.4%
MultiSourc...nchmarks/tramp3d-v4/tramp3d-v4 973.00 1005.00 3.3%
External/S.../CFP2006/447.dealII/447.dealII 2699.00 2780.00 3.0%
External/S...06/483.xalancbmk/483.xalancbmk 788.00 810.00 2.8%
External/S.../CFP2006/450.soplex/450.soplex 180.00 185.00 2.8%
MultiSourc.../DOE-ProxyApps-C++/CLAMR/CLAMR 338.00 345.00 2.1%
MultiSourc...Benchmarks/7zip/7zip-benchmark 685.00 699.00 2.0%
External/S...FP2017rate/544.nab_r/544.nab_r 158.00 160.00 1.3%
MultiSourc...sumer-typeset/consumer-typeset 772.00 781.00 1.2%
External/S...2017rate/525.x264_r/525.x264_r 410.00 414.00 1.0%
External/S...23.xalancbmk_r/523.xalancbmk_r 998.00 1002.00 0.4%
```
Compile-time is almost neutral:
https://llvm-compile-time-tracker.com/compare.php?from=b3125ad3d60531a97eea20009cc9629a87755862&to=84007eee59004f43464eda7f5ba8263ed5158df8&stat=instructions
NewPM-O3: +0.03%
NewPM-ReleaseThinLTO: -0.01%
NewPM-ReleaseLTO-g: +0.03%
Reviewed By: asbirlea
Differential Revision: https://reviews.llvm.org/D132365
Added:
Modified:
llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
llvm/test/Transforms/DeadStoreElimination/multiblock-memintrinsics.ll
llvm/test/Transforms/DeadStoreElimination/phi-translation.ll
llvm/test/Transforms/MemCpyOpt/memcpy.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index c1e3e6031ec98..45c404a140af3 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -1075,13 +1075,16 @@ struct DSEState {
}
MemoryAccess *UseAccess = WorkList[I];
- // Simply adding the users of MemoryPhi to the worklist is not enough,
- // because we might miss read clobbers in
diff erent iterations of a loop,
- // for example.
- // TODO: Add support for phi translation to handle the loop case.
- if (isa<MemoryPhi>(UseAccess))
- return false;
+ if (isa<MemoryPhi>(UseAccess)) {
+ // AliasAnalysis does not account for loops. Limit elimination to
+ // candidates for which we can guarantee they always store to the same
+ // memory location.
+ if (!isGuaranteedLoopInvariant(MaybeLoc->Ptr))
+ return false;
+ PushMemUses(cast<MemoryPhi>(UseAccess));
+ continue;
+ }
// TODO: Checking for aliasing is expensive. Consider reducing the amount
// of times this is called and/or caching it.
Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
diff --git a/llvm/test/Transforms/DeadStoreElimination/multiblock-memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/multiblock-memintrinsics.ll
index 8615cf9e46d98..d0d13228ac5a2 100644
--- a/llvm/test/Transforms/DeadStoreElimination/multiblock-memintrinsics.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/multiblock-memintrinsics.ll
@@ -138,8 +138,6 @@ define void @alloca_1(i1 %c) {
; CHECK: bb1:
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb2:
-; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
-; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: ret void
@@ -177,12 +175,8 @@ define void @alloca_2(i1 %c) {
; CHECK-NEXT: call void @readonly_use(i32* [[P]])
; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
; CHECK: bb1:
-; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
-; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb2:
-; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
-; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX2]], align 4
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/DeadStoreElimination/phi-translation.ll b/llvm/test/Transforms/DeadStoreElimination/phi-translation.ll
index 9f764bdb0d0b1..158e7658aa4b4 100644
--- a/llvm/test/Transforms/DeadStoreElimination/phi-translation.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/phi-translation.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -dse -S %s | FileCheck %s
-; TODO: Both the stores in %then and %else can be eliminated by translating %p
+; Both the stores in %then and %else can be eliminated by translating %p
; through the phi.
define void @memoryphi_translate_1(i1 %c) {
; CHECK-LABEL: @memoryphi_translate_1(
@@ -10,10 +10,8 @@ define void @memoryphi_translate_1(i1 %c) {
; CHECK-NEXT: [[A_2:%.*]] = alloca i8, align 1
; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
; CHECK: then:
-; CHECK-NEXT: store i8 0, i8* [[A_1]], align 1
; CHECK-NEXT: br label [[END:%.*]]
; CHECK: else:
-; CHECK-NEXT: store i8 9, i8* [[A_2]], align 1
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[P:%.*]] = phi i8* [ [[A_1]], [[THEN]] ], [ [[A_2]], [[ELSE]] ]
@@ -39,7 +37,7 @@ end:
ret void
}
-; TODO: The store in %else can be eliminated by translating %p through the phi.
+; The store in %else can be eliminated by translating %p through the phi.
; The store in %then cannot be eliminated, because %a.1 is read before the final
; store.
define i8 @memoryphi_translate_2(i1 %c) {
@@ -52,7 +50,6 @@ define i8 @memoryphi_translate_2(i1 %c) {
; CHECK-NEXT: store i8 0, i8* [[A_1]], align 1
; CHECK-NEXT: br label [[END:%.*]]
; CHECK: else:
-; CHECK-NEXT: store i8 9, i8* [[A_2]], align 1
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[P:%.*]] = phi i8* [ [[A_1]], [[THEN]] ], [ [[A_2]], [[ELSE]] ]
@@ -80,7 +77,7 @@ end:
ret i8 %l
}
-; TODO: The store in %then can be eliminated by translating %p through the phi.
+; The store in %then can be eliminated by translating %p through the phi.
; The store in %else cannot be eliminated, because %a.2 is read before the final
; store.
define i8 @memoryphi_translate_3(i1 %c) {
@@ -90,7 +87,6 @@ define i8 @memoryphi_translate_3(i1 %c) {
; CHECK-NEXT: [[A_2:%.*]] = alloca i8, align 1
; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
; CHECK: then:
-; CHECK-NEXT: store i8 0, i8* [[A_1]], align 1
; CHECK-NEXT: br label [[END:%.*]]
; CHECK: else:
; CHECK-NEXT: store i8 9, i8* [[A_2]], align 1
@@ -166,11 +162,9 @@ define void @memoryphi_translate_5(i1 %cond) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1
; CHECK-NEXT: [[B:%.*]] = alloca i8, align 1
-; CHECK-NEXT: [[C:%.*]] = alloca i8, align 1
; CHECK-NEXT: store i8 0, i8* [[A]], align 1
; CHECK-NEXT: br i1 [[COND:%.*]], label [[COND_TRUE:%.*]], label [[COND_END:%.*]]
; CHECK: cond.true:
-; CHECK-NEXT: store i8 0, i8* [[C]], align 1
; CHECK-NEXT: br label [[COND_END]]
; CHECK: cond.end:
; CHECK-NEXT: [[P:%.*]] = phi i8* [ [[B]], [[COND_TRUE]] ], [ [[A]], [[ENTRY:%.*]] ]
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
index 9b97e36078e1f..8ba7772b31782 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
@@ -225,12 +225,9 @@ define i8 @test4_read_between(i8 *%P) {
define void @test4_non_local(i8 *%P, i1 %c) {
; CHECK-LABEL: @test4_non_local(
-; CHECK-NEXT: [[A1:%.*]] = alloca [[TMP1:%.*]], align 8
-; CHECK-NEXT: [[A2:%.*]] = bitcast %1* [[A1]] to i8*
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[A2]], i8* align 4 [[P:%.*]], i64 8, i1 false)
; CHECK-NEXT: br i1 [[C:%.*]], label [[CALL:%.*]], label [[EXIT:%.*]]
; CHECK: call:
-; CHECK-NEXT: call void @test4a(i8* byval(i8) align 1 [[P]])
+; CHECK-NEXT: call void @test4a(i8* byval(i8) align 1 [[P:%.*]])
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
More information about the llvm-commits
mailing list