[llvm] 065bf12 - [DSE] Remove noop stores in MSSA.
via llvm-commits
llvm-commits at lists.llvm.org
Sat May 30 09:57:55 PDT 2020
Author: zoecarver
Date: 2020-05-30T09:57:30-07:00
New Revision: 065bf124fde8fd9d37eba0a387833cdfe9e68d08
URL: https://github.com/llvm/llvm-project/commit/065bf124fde8fd9d37eba0a387833cdfe9e68d08
DIFF: https://github.com/llvm/llvm-project/commit/065bf124fde8fd9d37eba0a387833cdfe9e68d08.diff
LOG: [DSE] Remove noop stores in MSSA.
Adds a simple fast-path check for the pattern:
v = load ptr
store v to ptr
I took the tests from the bugzilla post, I can add more if needed (but I think these should be sufficent).
Refs: https://bugs.llvm.org/show_bug.cgi?id=45795
Differential Revision: https://reviews.llvm.org/D79391
Added:
llvm/test/Transforms/DeadStoreElimination/MSSA/noop-stores.ll
Modified:
llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index c9be31c19442..36d8b4b06cf4 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -82,6 +82,7 @@ STATISTIC(NumFastStores, "Number of stores deleted");
STATISTIC(NumFastOther, "Number of other instrs removed");
STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
STATISTIC(NumModifiedStores, "Number of stores modified");
+STATISTIC(NumNoopStores, "Number of noop stores deleted");
DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
"Controls which MemoryDefs are eliminated.");
@@ -1821,6 +1822,21 @@ struct DSEState {
}
};
+/// \returns true if \p KillingDef stores the result of \p Load to the source of
+/// \p Load.
+static bool storeIsNoop(MemorySSA &MSSA, LoadInst *Load,
+ MemoryDef *KillingDef) {
+ Instruction *Store = KillingDef->getMemoryInst();
+ // If the load's operand isn't the destination of the store, bail.
+ if (Load->getPointerOperand() != Store->getOperand(1))
+ return false;
+
+ // Get the defining access for the load.
+ auto *LoadAccess = MSSA.getMemoryAccess(Load)->getDefiningAccess();
+ // The store is dead if the defining accesses are the same.
+ return LoadAccess == KillingDef->getDefiningAccess();
+}
+
bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
MemorySSA &MSSA, DominatorTree &DT,
PostDominatorTree &PDT,
@@ -1835,6 +1851,18 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
if (State.SkipStores.count(KillingDef))
continue;
Instruction *SI = KillingDef->getMemoryInst();
+
+ // Check if we're storing a value that we just loaded.
+ if (auto *Load = dyn_cast<LoadInst>(SI->getOperand(0))) {
+ if (storeIsNoop(MSSA, Load, KillingDef)) {
+ State.deleteDeadInstruction(SI);
+ LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *SI
+ << '\n');
+ NumNoopStores++;
+ continue;
+ }
+ }
+
auto MaybeSILoc = State.getLocForWriteEx(SI);
if (!MaybeSILoc) {
LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll
index 26df903bb275..8a4515388f2d 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll
@@ -98,3 +98,37 @@ define i32 @test15() {
store i32 1, i32* @x
ret i32 %x
}
+
+; **** Noop load->store tests **************************************************
+
+; We can optimize unordered atomic loads or stores.
+define void @test_load_atomic(i32* %Q) {
+; CHECK-LABEL: @test_load_atomic(
+; CHECK-NEXT: ret void
+;
+ %a = load atomic i32, i32* %Q unordered, align 4
+ store atomic i32 %a, i32* %Q unordered, align 4
+ ret void
+}
+
+; We can optimize unordered atomic loads or stores.
+define void @test_store_atomic(i32* %Q) {
+; CHECK-LABEL: @test_store_atomic(
+; CHECK-NEXT: ret void
+;
+ %a = load i32, i32* %Q
+ store atomic i32 %a, i32* %Q unordered, align 4
+ ret void
+}
+
+; We can NOT optimize release atomic loads or stores.
+define void @test_store_atomic_release(i32* %Q) {
+; CHECK-LABEL: @test_store_atomic_release(
+; CHECK-NEXT: load
+; CHECK-NEXT: store atomic
+; CHECK-NEXT: ret void
+;
+ %a = load i32, i32* %Q
+ store atomic i32 %a, i32* %Q release, align 4
+ ret void
+}
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/noop-stores.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/noop-stores.ll
new file mode 100644
index 000000000000..11eaf911392f
--- /dev/null
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/noop-stores.ll
@@ -0,0 +1,171 @@
+; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s
+; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture, i8, i64, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+declare void @llvm.init.trampoline(i8*, i8*, i8*)
+
+; **** Noop load->store tests **************************************************
+
+; We CAN optimize volatile loads.
+define void @test_load_volatile(i32* %Q) {
+; CHECK-LABEL: @test_load_volatile(
+; CHECK-NEXT: [[A:%.*]] = load volatile i32, i32* [[Q:%.*]]
+; CHECK-NEXT: store i32 [[A]], i32* [[Q]]
+; CHECK-NEXT: ret void
+;
+ %a = load volatile i32, i32* %Q
+ store i32 %a, i32* %Q
+ ret void
+}
+
+; We can NOT optimize volatile stores.
+define void @test_store_volatile(i32* %Q) {
+; CHECK-LABEL: @test_store_volatile(
+; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[Q:%.*]]
+; CHECK-NEXT: store volatile i32 [[A]]
+; CHECK-NEXT: ret void
+;
+ %a = load i32, i32* %Q
+ store volatile i32 %a, i32* %Q
+ ret void
+}
+
+; PR2599 - load -> store to same address.
+define void @test12({ i32, i32 }* %x) nounwind {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[X:%.*]], i32 0, i32 1
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+; CHECK-NEXT: [[TMP17:%.*]] = sub i32 0, [[TMP8]]
+; CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP7]], align 4
+; CHECK-NEXT: ret void
+;
+ %tmp4 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 0
+ %tmp5 = load i32, i32* %tmp4, align 4
+ %tmp7 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 1
+ %tmp8 = load i32, i32* %tmp7, align 4
+ %tmp17 = sub i32 0, %tmp8
+ store i32 %tmp5, i32* %tmp4, align 4
+ store i32 %tmp17, i32* %tmp7, align 4
+ ret void
+}
+
+; Remove redundant store if loaded value is in another block.
+define i32 @test26(i1 %c, i32* %p) {
+; CHECK-LABEL: @test26(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %v = load i32, i32* %p, align 4
+ br i1 %c, label %bb1, label %bb2
+bb1:
+ br label %bb3
+bb2:
+ store i32 %v, i32* %p, align 4
+ br label %bb3
+bb3:
+ ret i32 0
+}
+
+; Remove redundant store if loaded value is in another block.
+define i32 @test27(i1 %c, i32* %p) {
+; CHECK-LABEL: @test27(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %v = load i32, i32* %p, align 4
+ br i1 %c, label %bb1, label %bb2
+bb1:
+ br label %bb3
+bb2:
+ br label %bb3
+bb3:
+ store i32 %v, i32* %p, align 4
+ ret i32 0
+}
+
+declare void @unknown_func()
+
+; Remove redundant store, which is in the lame loop as the load.
+define i32 @test33(i1 %c, i32* %p, i32 %i) {
+; CHECK-LABEL: @test33(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[BB1:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br label [[BB2:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: call void @unknown_func()
+; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB3:%.*]]
+; CHECK: bb3:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ br label %bb1
+bb1:
+ %v = load i32, i32* %p, align 4
+ br label %bb2
+bb2:
+ store i32 %v, i32* %p, align 4
+ ; Might read and overwrite value at %p, but doesn't matter.
+ call void @unknown_func()
+ br i1 undef, label %bb1, label %bb3
+bb3:
+ ret i32 0
+}
+
+declare void @unkown_write(i32*)
+
+; We can't remove the "noop" store around an unkown write.
+define void @test43(i32* %Q) {
+; CHECK-LABEL: @test43(
+; CHECK-NEXT: load
+; CHECK-NEXT: call
+; CHECK-NEXT: store
+; CHECK-NEXT: ret void
+;
+ %a = load i32, i32* %Q
+ call void @unkown_write(i32* %Q)
+ store i32 %a, i32* %Q
+ ret void
+}
+
+; We CAN remove it when the unkown write comes AFTER.
+define void @test44(i32* %Q) {
+; CHECK-LABEL: @test44(
+; CHECK-NEXT: call
+; CHECK-NEXT: ret void
+ %a = load i32, i32* %Q
+ store i32 %a, i32* %Q
+ call void @unkown_write(i32* %Q)
+ ret void
+}
+
+define void @test45(i32* %Q) {
+; CHECK-LABEL: @test45(
+; CHECK-NEXT: [[A:%.*]] = load
+; CHECK-NEXT: store i32 [[A]]
+; CHECK-NEXT: ret void
+ %a = load i32, i32* %Q
+ store i32 10, i32* %Q
+ store i32 %a, i32* %Q
+ ret void
+}
+
+
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
index 7e212187f030..70c055e3b370 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
@@ -10,16 +10,6 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) n
declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
declare void @llvm.init.trampoline(i8*, i8*, i8*)
-define void @test5(i32* %Q) {
-; CHECK-LABEL: @test5(
-; CHECK-NEXT: [[A:%.*]] = load volatile i32, i32* [[Q:%.*]]
-; CHECK-NEXT: ret void
-;
- %a = load volatile i32, i32* %Q
- store i32 %a, i32* %Q
- ret void
-}
-
; Do not delete stores that are only partially killed.
define i32 @test8() {
; CHECK-LABEL: @test8(
@@ -80,25 +70,6 @@ define void @test11() {
ret void
}
-; PR2599 - load -> store to same address.
-define void @test12({ i32, i32 }* %x) nounwind {
-; CHECK-LABEL: @test12(
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[X:%.*]], i32 0, i32 1
-; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
-; CHECK-NEXT: [[TMP17:%.*]] = sub i32 0, [[TMP8]]
-; CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP7]], align 4
-; CHECK-NEXT: ret void
-;
- %tmp4 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 0
- %tmp5 = load i32, i32* %tmp4, align 4
- %tmp7 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 1
- %tmp8 = load i32, i32* %tmp7, align 4
- %tmp17 = sub i32 0, %tmp8
- store i32 %tmp5, i32* %tmp4, align 4
- store i32 %tmp17, i32* %tmp7, align 4
- ret void
-}
-
declare noalias i8* @malloc(i32)
declare noalias i8* @calloc(i32, i32)
@@ -143,54 +114,6 @@ define void @test22(i1 %i, i32 %k, i32 %m) nounwind {
ret void
}
-; Remove redundant store if loaded value is in another block.
-define i32 @test26(i1 %c, i32* %p) {
-; CHECK-LABEL: @test26(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
-; CHECK: bb1:
-; CHECK-NEXT: br label [[BB3:%.*]]
-; CHECK: bb2:
-; CHECK-NEXT: br label [[BB3]]
-; CHECK: bb3:
-; CHECK-NEXT: ret i32 0
-;
-entry:
- %v = load i32, i32* %p, align 4
- br i1 %c, label %bb1, label %bb2
-bb1:
- br label %bb3
-bb2:
- store i32 %v, i32* %p, align 4
- br label %bb3
-bb3:
- ret i32 0
-}
-
-; Remove redundant store if loaded value is in another block.
-define i32 @test27(i1 %c, i32* %p) {
-; CHECK-LABEL: @test27(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
-; CHECK: bb1:
-; CHECK-NEXT: br label [[BB3:%.*]]
-; CHECK: bb2:
-; CHECK-NEXT: br label [[BB3]]
-; CHECK: bb3:
-; CHECK-NEXT: ret i32 0
-;
-entry:
- %v = load i32, i32* %p, align 4
- br i1 %c, label %bb1, label %bb2
-bb1:
- br label %bb3
-bb2:
- br label %bb3
-bb3:
- store i32 %v, i32* %p, align 4
- ret i32 0
-}
-
declare void @unknown_func()
; Remove redundant store if loaded value is in another block inside a loop.
@@ -213,48 +136,6 @@ bb2:
ret i32 0
}
-; Remove redundant store, which is in the lame loop as the load.
-define i32 @test33(i1 %c, i32* %p, i32 %i) {
-; CHECK-LABEL: @test33(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br label [[BB1:%.*]]
-; CHECK: bb1:
-; CHECK-NEXT: br label [[BB2:%.*]]
-; CHECK: bb2:
-; CHECK-NEXT: call void @unknown_func()
-; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB3:%.*]]
-; CHECK: bb3:
-; CHECK-NEXT: ret i32 0
-;
-entry:
- br label %bb1
-bb1:
- %v = load i32, i32* %p, align 4
- br label %bb2
-bb2:
- store i32 %v, i32* %p, align 4
- ; Might read and overwrite value at %p, but doesn't matter.
- call void @unknown_func()
- br i1 undef, label %bb1, label %bb3
-bb3:
- ret i32 0
-}
-
-define void @test43(i32* %P, i32* noalias %Q) {
-; CHECK-LABEL: @test43(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: store i32 50331649, i32* [[P:%.*]]
-; CHECK-NEXT: store i32 2, i32* [[Q:%.*]]
-; CHECK-NEXT: ret void
-;
-entry:
- store i32 1, i32* %P
- %P2 = bitcast i32* %P to i8*
- store i32 2, i32* %Q
- store i8 3, i8* %P2
- ret void
-}
-
define void @test43a(i32* %P, i32* noalias %Q) {
; CHECK-LABEL: @test43a(
; CHECK-NEXT: entry:
More information about the llvm-commits
mailing list