[llvm] c9ad356 - [DSE] Use optimized access if available for redundant store elimination.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 30 07:40:55 PST 2021
Author: Florian Hahn
Date: 2021-11-30T15:40:14Z
New Revision: c9ad356266f3c91d90bcb149d178423cb3e04c42
URL: https://github.com/llvm/llvm-project/commit/c9ad356266f3c91d90bcb149d178423cb3e04c42
DIFF: https://github.com/llvm/llvm-project/commit/c9ad356266f3c91d90bcb149d178423cb3e04c42.diff
LOG: [DSE] Use optimized access if available for redundant store elimination.
Using the optimized access enables additional optimizations in cases
where the defining access is a non-aliasing store.
Alternatively we could also walk upwards and skip non-aliasing defs
here, but my experiments so far showed that this will noticeably
increase compile-time for little extra gain compared to just using the
optimized access.
Improvements of dse.NumRedundantStores on MultiSource/CINT2006/CPF2006
on X86 with -O3:
test-suite...-typeset/consumer-typeset.test 1.00 76.00 7500.0%
test-suite.../Benchmarks/Bullet/bullet.test 3.00 12.00 300.0%
test-suite...006/453.povray/453.povray.test 3.00 6.00 100.0%
test-suite...telecomm-gsm/telecomm-gsm.test 1.00 2.00 100.0%
test-suite...ediabench/gsm/toast/toast.test 1.00 2.00 100.0%
test-suite...marks/7zip/7zip-benchmark.test 1.00 2.00 100.0%
test-suite...ications/JM/lencod/lencod.test 7.00 10.00 42.9%
test-suite...6/464.h264ref/464.h264ref.test 6.00 8.00 33.3%
test-suite...ications/JM/ldecod/ldecod.test 6.00 7.00 16.7%
test-suite...006/447.dealII/447.dealII.test 33.00 33.00 0.0%
test-suite...6/471.omnetpp/471.omnetpp.test NaN 1.00 nan%
test-suite...006/450.soplex/450.soplex.test NaN 2.00 nan%
test-suite.../CINT2006/403.gcc/403.gcc.test NaN 7.00 nan%
test-suite...lications/ClamAV/clamscan.test NaN 1.00 nan%
test-suite...CI_Purple/SMG2000/smg2000.test NaN 3.00 nan%
Follow-up to D111727.
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D112315
Added:
Modified:
llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index e7c14eb5b7472..3646d3f3017d6 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -1928,7 +1928,14 @@ struct DSEState {
if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def) ||
!isRemovable(Def->getMemoryInst()))
continue;
- auto *UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
+ MemoryDef *UpperDef;
+ // To conserve compile-time, we avoid walking to the next clobbering def.
+ // Instead, we just try to get the optimized access, if it exists. DSE
+ // will try to optimize defs during the earlier traversal.
+ if (Def->isOptimized())
+ UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
+ else
+ UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
continue;
diff --git a/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll b/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
index 127c027924210..7faf06c53217d 100644
--- a/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
@@ -1,5 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -basic-aa -dse -S %s | FileCheck %s
+; RUN: opt -basic-aa -dse -dse-optimize-memoryssa=false -S %s | FileCheck --check-prefixes=CHECK,UNOPT %s
+; RUN: opt -basic-aa -dse -dse-optimize-memoryssa -S %s | FileCheck --check-prefixes=CHECK,OPT %s
+; RUN: opt -basic-aa -dse -S %s | FileCheck --check-prefixes=CHECK,UNOPT %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -315,17 +317,28 @@ bb3:
; The store in bb3 can be eliminated, because the store in bb1 cannot alias it.
define void @test10(i32* noalias %P, i32* %Q, i1 %c) {
-; CHECK-LABEL: @test10(
-; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4
-; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
-; CHECK: bb1:
-; CHECK-NEXT: store i32 10, i32* [[Q:%.*]], align 4
-; CHECK-NEXT: br label [[BB3:%.*]]
-; CHECK: bb2:
-; CHECK-NEXT: ret void
-; CHECK: bb3:
-; CHECK-NEXT: store i32 0, i32* [[P]], align 4
-; CHECK-NEXT: ret void
+; UNOPT-LABEL: @test10(
+; UNOPT-NEXT: store i32 0, i32* [[P:%.*]], align 4
+; UNOPT-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; UNOPT: bb1:
+; UNOPT-NEXT: store i32 10, i32* [[Q:%.*]], align 4
+; UNOPT-NEXT: br label [[BB3:%.*]]
+; UNOPT: bb2:
+; UNOPT-NEXT: ret void
+; UNOPT: bb3:
+; UNOPT-NEXT: store i32 0, i32* [[P]], align 4
+; UNOPT-NEXT: ret void
+;
+; OPT-LABEL: @test10(
+; OPT-NEXT: store i32 0, i32* [[P:%.*]], align 4
+; OPT-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; OPT: bb1:
+; OPT-NEXT: store i32 10, i32* [[Q:%.*]], align 4
+; OPT-NEXT: br label [[BB3:%.*]]
+; OPT: bb2:
+; OPT-NEXT: ret void
+; OPT: bb3:
+; OPT-NEXT: ret void
;
store i32 0, i32* %P
br i1 %c, label %bb1, label %bb2
@@ -412,13 +425,19 @@ define void @test12_memset_simple(i8* %ptr) {
}
define void @test12_memset_other_store_in_between(i8* %ptr) {
-; CHECK-LABEL: @test12_memset_other_store_in_between(
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[PTR:%.*]], i8 0, i64 10, i1 false)
-; CHECK-NEXT: [[PTR_4:%.*]] = getelementptr i8, i8* [[PTR]], i64 4
-; CHECK-NEXT: store i8 8, i8* [[PTR_4]], align 1
-; CHECK-NEXT: [[PTR_5:%.*]] = getelementptr i8, i8* [[PTR]], i64 5
-; CHECK-NEXT: store i8 0, i8* [[PTR_5]], align 1
-; CHECK-NEXT: ret void
+; UNOPT-LABEL: @test12_memset_other_store_in_between(
+; UNOPT-NEXT: call void @llvm.memset.p0i8.i64(i8* [[PTR:%.*]], i8 0, i64 10, i1 false)
+; UNOPT-NEXT: [[PTR_4:%.*]] = getelementptr i8, i8* [[PTR]], i64 4
+; UNOPT-NEXT: store i8 8, i8* [[PTR_4]], align 1
+; UNOPT-NEXT: [[PTR_5:%.*]] = getelementptr i8, i8* [[PTR]], i64 5
+; UNOPT-NEXT: store i8 0, i8* [[PTR_5]], align 1
+; UNOPT-NEXT: ret void
+;
+; OPT-LABEL: @test12_memset_other_store_in_between(
+; OPT-NEXT: call void @llvm.memset.p0i8.i64(i8* [[PTR:%.*]], i8 0, i64 10, i1 false)
+; OPT-NEXT: [[PTR_4:%.*]] = getelementptr i8, i8* [[PTR]], i64 4
+; OPT-NEXT: store i8 8, i8* [[PTR_4]], align 1
+; OPT-NEXT: ret void
;
call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 10, i1 false)
%ptr.4 = getelementptr i8, i8* %ptr, i64 4
@@ -525,8 +544,8 @@ declare i8* @strcat(i8*, i8*) nounwind argmemonly
define void @test14_strcat(i8* noalias %P, i8* noalias %Q) {
; CHECK-LABEL: @test14_strcat(
-; CHECK-NEXT: call i8* @strcat(i8* [[P:%.*]], i8* [[Q:%.*]])
-; CHECK-NEXT: call i8* @strcat(i8* [[P]], i8* [[Q]])
+; CHECK-NEXT: [[CALL1:%.*]] = call i8* @strcat(i8* [[P:%.*]], i8* [[Q:%.*]])
+; CHECK-NEXT: [[CALL2:%.*]] = call i8* @strcat(i8* [[P]], i8* [[Q]])
; CHECK-NEXT: ret void
;
%call1 = call i8* @strcat(i8* %P, i8* %Q)
More information about the llvm-commits
mailing list