[llvm] 120c059 - [DSE,MSSA] Port partial store merging.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 15 10:42:39 PDT 2020


Author: Florian Hahn
Date: 2020-06-15T18:41:46+01:00
New Revision: 120c0592929e77596fb0598b31ebbf98fea1deb3

URL: https://github.com/llvm/llvm-project/commit/120c0592929e77596fb0598b31ebbf98fea1deb3
DIFF: https://github.com/llvm/llvm-project/commit/120c0592929e77596fb0598b31ebbf98fea1deb3.diff

LOG: [DSE,MSSA] Port partial store merging.

Port partial constant store merging logic to MemorySSA backed DSE. The
heavy lifting is done by the existing helper function. It is used in
context where we already ensured that the later instruction can
eliminate the earlier one, if it is a complete overwrite.

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
    llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 4c5492eef37c..c891487b42d6 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -2064,6 +2064,28 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
       OverwriteResult OR = isOverwrite(SILoc, NILoc, DL, TLI, DepWriteOffset,
                                        InstWriteOffset, NI, IOL, AA, &F);
 
+      if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
+        auto *Earlier = dyn_cast<StoreInst>(NI);
+        auto *Later = dyn_cast<StoreInst>(SI);
+        if (Constant *Merged = tryToMergePartialOverlappingStores(
+                Earlier, Later, InstWriteOffset, DepWriteOffset, DL, &AA,
+                &DT)) {
+
+          // Update stored value of earlier store to merged constant.
+          Earlier->setOperand(0, Merged);
+          ++NumModifiedStores;
+          MadeChange = true;
+
+          // Remove later store and remove any outstanding overlap intervals for
+          // the updated store.
+          State.deleteDeadInstruction(Later);
+          auto I = State.IOLs.find(Earlier->getParent());
+          if (I != State.IOLs.end())
+            I->second.erase(Earlier);
+          break;
+        }
+      }
+
       ToCheck.insert(NextDef->getDefiningAccess());
       if (OR == OW_Complete) {
         LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: " << *NI

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll
index 055f117c4b5c..8acc29f3f62e 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll
@@ -1,5 +1,4 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; XFAIL: *
 ; RUN: opt -dse -enable-dse-memoryssa -enable-dse-partial-store-merging -S < %s | FileCheck %s
 target datalayout = "E-m:e-i64:64-i128:128-n32:64-S128"
 

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll
index 9cd3a7cec013..4ea5f08df41a 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll
@@ -6,15 +6,15 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define void @test4(i32* noalias %P) {
 ; CHECK-LABEL: @test4(
-; CHECK-NEXT:    store i32 0, i32* [[P:%.*]]
+; CHECK-NEXT:    store i32 0, i32* [[P:%.*]], align 4
 ; CHECK-NEXT:    br i1 true, label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    br label [[BB3:%.*]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    [[X:%.*]] = load i32, i32* [[P]]
+; CHECK-NEXT:    [[X:%.*]] = load i32, i32* [[P]], align 4
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    store i32 0, i32* [[P]]
+; CHECK-NEXT:    store i32 0, i32* [[P]], align 4
 ; CHECK-NEXT:    ret void
 ;
   store i32 0, i32* %P
@@ -37,7 +37,7 @@ define void @test5(i32* noalias %P) {
 ; CHECK:       bb2:
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    store i32 0, i32* [[P:%.*]]
+; CHECK-NEXT:    store i32 0, i32* [[P:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   br i1 true, label %bb1, label %bb2
@@ -58,10 +58,10 @@ define void @test8(i32* %P, i32* %Q) {
 ; CHECK:       bb1:
 ; CHECK-NEXT:    br label [[BB3:%.*]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    store i32 1, i32* [[Q:%.*]]
+; CHECK-NEXT:    store i32 1, i32* [[Q:%.*]], align 4
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    store i32 0, i32* [[P:%.*]]
+; CHECK-NEXT:    store i32 0, i32* [[P:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   br i1 true, label %bb1, label %bb2
@@ -78,15 +78,13 @@ bb3:
 
 define void @test10(i32* noalias %P) {
 ; CHECK-LABEL: @test10(
-; CHECK-NEXT:    [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8*
-; CHECK-NEXT:    store i32 0, i32* [[P]]
+; CHECK-NEXT:    store i32 1, i32* [[P:%.*]], align 4
 ; CHECK-NEXT:    br i1 true, label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    br label [[BB3:%.*]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    store i8 1, i8* [[P2]]
 ; CHECK-NEXT:    ret void
 ;
   %P2 = bitcast i32* %P to i8*

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll
index 81184fee69a9..f3e019be7882 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll
@@ -5,15 +5,13 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define void @second_store_smaller_1(i32* noalias %P, i1 %c) {
 ; CHECK-LABEL: @second_store_smaller_1(
-; CHECK-NEXT:    store i32 1, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    store i32 0, i32* [[P:%.*]], align 4
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    br label [[BB3:%.*]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[P_I16:%.*]] = bitcast i32* [[P]] to i16*
-; CHECK-NEXT:    store i16 0, i16* [[P_I16]], align 2
 ; CHECK-NEXT:    ret void
 ;
   store i32 1, i32* %P
@@ -30,15 +28,13 @@ bb3:
 
 define void @second_store_smaller_2(i32* noalias %P, i1 %c) {
 ; CHECK-LABEL: @second_store_smaller_2(
-; CHECK-NEXT:    store i32 1, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    store i32 12345, i32* [[P:%.*]], align 4
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    br label [[BB3:%.*]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[P_I16:%.*]] = bitcast i32* [[P]] to i16*
-; CHECK-NEXT:    store i16 12345, i16* [[P_I16]], align 2
 ; CHECK-NEXT:    ret void
 ;
   store i32 1, i32* %P

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
index 70c055e3b370..82228fa590c9 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
@@ -10,23 +10,6 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) n
 declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
 declare void @llvm.init.trampoline(i8*, i8*, i8*)
 
-; Do not delete stores that are only partially killed.
-define i32 @test8() {
-; CHECK-LABEL: @test8(
-; CHECK-NEXT:    [[V:%.*]] = alloca i32
-; CHECK-NEXT:    store i32 1234567, i32* [[V]]
-; CHECK-NEXT:    [[X:%.*]] = load i32, i32* [[V]]
-; CHECK-NEXT:    ret i32 [[X]]
-;
-  %V = alloca i32
-  store i32 1234567, i32* %V
-  %V2 = bitcast i32* %V to i8*
-  store i8 0, i8* %V2
-  %X = load i32, i32* %V
-  ret i32 %X
-
-}
-
 ; Test for byval handling.
 %struct.x = type { i32, i32, i32, i32 }
 define void @test9(%struct.x* byval  %a) nounwind  {
@@ -135,18 +118,3 @@ bb1:
 bb2:
   ret i32 0
 }
-
-define void @test43a(i32* %P, i32* noalias %Q) {
-; CHECK-LABEL: @test43a(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store atomic i32 50331649, i32* [[P:%.*]] unordered, align 4
-; CHECK-NEXT:    store atomic i32 2, i32* [[Q:%.*]] unordered, align 4
-; CHECK-NEXT:    ret void
-;
-entry:
-  store atomic i32 1, i32* %P unordered, align 4
-  %P2 = bitcast i32* %P to i8*
-  store atomic i32 2, i32* %Q unordered, align 4
-  store atomic i8 3, i8* %P2 unordered, align 4
-  ret void
-}

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll
index 02b065ce627c..6eff9b8cd9a1 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll
@@ -119,11 +119,27 @@ define void @test7_atomic(i32* align 4 %p, i8* align 4 %q, i8* noalias align 4 %
   ret void
 }
 
+; Do not delete stores that are only partially killed.
+define i32 @test8() {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[V:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 1234567, i32* [[V]], align 4
+; CHECK-NEXT:    [[X:%.*]] = load i32, i32* [[V]], align 4
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %V = alloca i32
+  store i32 1234567, i32* %V
+  %V2 = bitcast i32* %V to i8*
+  store i8 0, i8* %V2
+  %X = load i32, i32* %V
+  ret i32 %X
+
+}
 
 ; va_arg has fuzzy dependence, the store shouldn't be zapped.
 define double @test10(i8* %X) {
 ; CHECK-LABEL: @test10(
-; CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i8*
+; CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i8*, align 8
 ; CHECK-NEXT:    store i8* [[X:%.*]], i8** [[X_ADDR]], align 8
 ; CHECK-NEXT:    [[TMP_0:%.*]] = va_arg i8** [[X_ADDR]], double
 ; CHECK-NEXT:    ret double [[TMP_0]]
@@ -579,3 +595,18 @@ define void @test42a(i32* %P, i32* %Q) {
   store atomic i8 3, i8* %P2 unordered, align 4
   ret void
 }
+
+define void @test43a(i32* %P, i32* noalias %Q) {
+; CHECK-LABEL: @test43a(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store atomic i32 50331649, i32* [[P:%.*]] unordered, align 4
+; CHECK-NEXT:    store atomic i32 2, i32* [[Q:%.*]] unordered, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  store atomic i32 1, i32* %P unordered, align 4
+  %P2 = bitcast i32* %P to i8*
+  store atomic i32 2, i32* %Q unordered, align 4
+  store atomic i8 3, i8* %P2 unordered, align 4
+  ret void
+}


        


More information about the llvm-commits mailing list