[llvm] r314206 - [DSE] Merge stores when the later store only writes to memory locations the early store also wrote to (2nd try)

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 26 06:54:28 PDT 2017


Author: spatel
Date: Tue Sep 26 06:54:28 2017
New Revision: 314206

URL: http://llvm.org/viewvc/llvm-project?rev=314206&view=rev
Log:
[DSE] Merge stores when the later store only writes to memory locations the early store also wrote to (2nd try)

This is a 2nd attempt at:
https://reviews.llvm.org/rL310055
...which was reverted at rL310123 because of PR34074:
https://bugs.llvm.org/show_bug.cgi?id=34074

In this version, we break out of the inner loop after we successfully merge and kill a pair of stores. In the
earlier rev, we were continuing instead, which meant we could process the invalid info from a now dead store.

Original commit message (authored by Filipe Cabecinhas):

This fixes PR31777.

If both stores' values are ConstantInt, we merge the two stores
(shifting the smaller store appropriately) and replace the earlier (and
larger) store with an updated constant.

In the future we should also support vectors of integers. And maybe
float/double if we can.  

Differential Revision: https://reviews.llvm.org/D30703

Added:
    llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll
    llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
    llvm/trunk/test/Transforms/DeadStoreElimination/PartialStore.ll
    llvm/trunk/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll

Modified: llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp?rev=314206&r1=314205&r2=314206&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp Tue Sep 26 06:54:28 2017
@@ -34,6 +34,7 @@
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -49,12 +50,18 @@ STATISTIC(NumRedundantStores, "Number of
 STATISTIC(NumFastStores, "Number of stores deleted");
 STATISTIC(NumFastOther , "Number of other instrs removed");
 STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
+STATISTIC(NumModifiedStores, "Number of stores modified");
 
 static cl::opt<bool>
 EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
   cl::init(true), cl::Hidden,
   cl::desc("Enable partial-overwrite tracking in DSE"));
 
+static cl::opt<bool>
+EnablePartialStoreMerging("enable-dse-partial-store-merging",
+  cl::init(true), cl::Hidden,
+  cl::desc("Enable partial store merging in DSE"));
+
 
 //===----------------------------------------------------------------------===//
 // Helper functions
@@ -287,14 +294,22 @@ static uint64_t getPointerSize(const Val
 }
 
 namespace {
-enum OverwriteResult { OW_Begin, OW_Complete, OW_End, OW_Unknown };
+enum OverwriteResult {
+  OW_Begin,
+  OW_Complete,
+  OW_End,
+  OW_PartialEarlierWithFullLater,
+  OW_Unknown
+};
 }
 
 /// Return 'OW_Complete' if a store to the 'Later' location completely
 /// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the
 /// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the
-/// beginning of the 'Earlier' location is overwritten by 'Later', or
-/// 'OW_Unknown' if nothing can be determined.
+/// beginning of the 'Earlier' location is overwritten by 'Later'.
+/// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was
+/// overwritten by a latter (smaller) store which doesn't write outside the big
+/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
 static OverwriteResult isOverwrite(const MemoryLocation &Later,
                                    const MemoryLocation &Earlier,
                                    const DataLayout &DL,
@@ -427,6 +442,19 @@ static OverwriteResult isOverwrite(const
     }
   }
 
+  // Check for an earlier store which writes to all the memory locations that
+  // the later store writes to.
+  if (EnablePartialStoreMerging && LaterOff >= EarlierOff &&
+      int64_t(EarlierOff + Earlier.Size) > LaterOff &&
+      uint64_t(LaterOff - EarlierOff) + Later.Size <= Earlier.Size) {
+    DEBUG(dbgs() << "DSE: Partial overwrite an earlier load [" << EarlierOff
+                 << ", " << int64_t(EarlierOff + Earlier.Size)
+                 << ") by a later store [" << LaterOff << ", "
+                 << int64_t(LaterOff + Later.Size) << ")\n");
+    // TODO: Maybe come up with a better name?
+    return OW_PartialEarlierWithFullLater;
+  }
+
   // Another interesting case is if the later store overwrites the end of the
   // earlier store.
   //
@@ -1094,6 +1122,8 @@ static bool eliminateDeadStores(BasicBlo
       // If we find a write that is a) removable (i.e., non-volatile), b) is
       // completely obliterated by the store to 'Loc', and c) which we know that
       // 'Inst' doesn't load from, then we can remove it.
+      // Also try to merge two stores if a later one only touches memory written
+      // to by the earlier one.
       if (isRemovable(DepWrite) &&
           !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) {
         int64_t InstWriteOffset, DepWriteOffset;
@@ -1123,6 +1153,72 @@ static bool eliminateDeadStores(BasicBlo
           bool IsOverwriteEnd = (OR == OW_End);
           MadeChange |= tryToShorten(DepWrite, DepWriteOffset, EarlierSize,
                                     InstWriteOffset, LaterSize, IsOverwriteEnd);
+        } else if (EnablePartialStoreMerging &&
+                   OR == OW_PartialEarlierWithFullLater) {
+          auto *Earlier = dyn_cast<StoreInst>(DepWrite);
+          auto *Later = dyn_cast<StoreInst>(Inst);
+          if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) &&
+              Later && isa<ConstantInt>(Later->getValueOperand())) {
+            // If the store we find is:
+            //   a) partially overwritten by the store to 'Loc'
+            //   b) the later store is fully contained in the earlier one and
+            //   c) they both have a constant value
+            // Merge the two stores, replacing the earlier store's value with a
+            // merge of both values.
+            // TODO: Deal with other constant types (vectors, etc), and probably
+            // some mem intrinsics (if needed)
+
+            APInt EarlierValue =
+                cast<ConstantInt>(Earlier->getValueOperand())->getValue();
+            APInt LaterValue =
+                cast<ConstantInt>(Later->getValueOperand())->getValue();
+            unsigned LaterBits = LaterValue.getBitWidth();
+            assert(EarlierValue.getBitWidth() > LaterValue.getBitWidth());
+            LaterValue = LaterValue.zext(EarlierValue.getBitWidth());
+
+            // Offset of the smaller store inside the larger store
+            unsigned BitOffsetDiff = (InstWriteOffset - DepWriteOffset) * 8;
+            unsigned LShiftAmount =
+                DL.isBigEndian()
+                    ? EarlierValue.getBitWidth() - BitOffsetDiff - LaterBits
+                    : BitOffsetDiff;
+            APInt Mask =
+                APInt::getBitsSet(EarlierValue.getBitWidth(), LShiftAmount,
+                                  LShiftAmount + LaterBits);
+            // Clear the bits we'll be replacing, then OR with the smaller
+            // store, shifted appropriately.
+            APInt Merged =
+                (EarlierValue & ~Mask) | (LaterValue << LShiftAmount);
+            DEBUG(dbgs() << "DSE: Merge Stores:\n  Earlier: " << *DepWrite
+                         << "\n  Later: " << *Inst
+                         << "\n  Merged Value: " << Merged << '\n');
+
+            auto *SI = new StoreInst(
+                ConstantInt::get(Earlier->getValueOperand()->getType(), Merged),
+                Earlier->getPointerOperand(), false, Earlier->getAlignment(),
+                Earlier->getOrdering(), Earlier->getSyncScopeID(), DepWrite);
+
+            unsigned MDToKeep[] = {LLVMContext::MD_dbg, LLVMContext::MD_tbaa,
+                                   LLVMContext::MD_alias_scope,
+                                   LLVMContext::MD_noalias,
+                                   LLVMContext::MD_nontemporal};
+            SI->copyMetadata(*DepWrite, MDToKeep);
+            ++NumModifiedStores;
+
+            // Remove earlier, wider, store
+            size_t Idx = InstrOrdering.lookup(DepWrite);
+            InstrOrdering.erase(DepWrite);
+            InstrOrdering.insert(std::make_pair(SI, Idx));
+
+            // Delete the old stores and now-dead instructions that feed them.
+            deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL, &InstrOrdering);
+            deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL,
+                                  &InstrOrdering);
+            MadeChange = true;
+
+            // We erased DepWrite and Inst (Loc); start over.
+            break;
+          }
         }
       }
 

Modified: llvm/trunk/test/Transforms/DeadStoreElimination/PartialStore.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/PartialStore.ll?rev=314206&r1=314205&r2=314206&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/PartialStore.ll (original)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/PartialStore.ll Tue Sep 26 06:54:28 2017
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+; RUN: opt < %s -basicaa -dse -enable-dse-partial-store-merging=false -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 ; Ensure that the dead store is deleted in this case.  It is wholely

Modified: llvm/trunk/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll?rev=314206&r1=314205&r2=314206&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll (original)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll Tue Sep 26 06:54:28 2017
@@ -1,4 +1,4 @@
-; RUN: opt -S -dse < %s | FileCheck %s
+; RUN: opt -S -dse -enable-dse-partial-store-merging=false < %s | FileCheck %s
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-bgq-linux"
 

Added: llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll?rev=314206&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll Tue Sep 26 06:54:28 2017
@@ -0,0 +1,173 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -dse -enable-dse-partial-store-merging -S < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-i128:128-n32:64-S128"
+
+define void @byte_by_byte_replacement(i32 *%ptr) {
+; CHECK-LABEL: @byte_by_byte_replacement(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 151653132, i32* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  ;; This store's value should be modified as it should be better to use one
+  ;; larger store than several smaller ones.
+  ;; store will turn into 0x090A0B0C == 151653132
+  store i32 305419896, i32* %ptr  ; 0x12345678
+  %bptr = bitcast i32* %ptr to i8*
+  %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1
+  %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+
+  ;; We should be able to merge these four stores with the i32 above
+  ; value (and bytes) stored before  ; 0x12345678
+  store i8 9, i8* %bptr              ;   09
+  store i8 10, i8* %bptr1            ;     0A
+  store i8 11, i8* %bptr2            ;       0B
+  store i8 12, i8* %bptr3            ;         0C
+  ;                                    0x090A0B0C
+
+  ret void
+}
+
+define void @word_replacement(i64 *%ptr) {
+; CHECK-LABEL: @word_replacement(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 72638273700655232, i64* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i64 72623859790382856, i64* %ptr  ; 0x0102030405060708
+
+  %wptr = bitcast i64* %ptr to i16*
+  %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
+  %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2
+  %wptr3 = getelementptr inbounds i16, i16* %wptr, i64 3
+
+  ;; We should be able to merge these two stores with the i64 one above
+  ; value (and bytes) stored before  ; 0x0102030405060708
+  store i16  4128, i16* %wptr1       ;       1020
+  store i16 28800, i16* %wptr3       ;               7080
+  ;                                    0x0102102005067080
+
+  ret void
+}
+
+
+define void @differently_sized_replacements(i64 *%ptr) {
+; CHECK-LABEL: @differently_sized_replacements(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 289077004501059343, i64* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i64 579005069656919567, i64* %ptr  ; 0x08090a0b0c0d0e0f
+
+  %bptr = bitcast i64* %ptr to i8*
+  %bptr6 = getelementptr inbounds i8, i8* %bptr, i64 6
+  %wptr = bitcast i64* %ptr to i16*
+  %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2
+  %dptr = bitcast i64* %ptr to i32*
+
+  ;; We should be able to merge all these stores with the i64 one above
+  ; value (and bytes) stored before  ; 0x08090a0b0c0d0e0f
+  store i8         7, i8*  %bptr6    ;               07
+  store i16     1541, i16* %wptr2    ;           0605
+  store i32 67305985, i32* %dptr     ;   04030201
+  ;                                    0x040302010605070f
+  ret void
+}
+
+
+define void @multiple_replacements_to_same_byte(i64 *%ptr) {
+; CHECK-LABEL: @multiple_replacements_to_same_byte(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 289077004602248719, i64* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i64 579005069656919567, i64* %ptr  ; 0x08090a0b0c0d0e0f
+
+  %bptr = bitcast i64* %ptr to i8*
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+  %wptr = bitcast i64* %ptr to i16*
+  %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
+  %dptr = bitcast i64* %ptr to i32*
+
+  ;; We should be able to merge all these stores with the i64 one above
+  ; value (and bytes) stored before  ; 0x08090a0b0c0d0e0f
+  store i8         7, i8*  %bptr3    ;         07
+  store i16     1541, i16* %wptr1    ;       0605
+  store i32 67305985, i32* %dptr     ;   04030201
+  ;                                    0x040302010c0d0e0f
+  ret void
+}
+
+define void @merged_merges(i64 *%ptr) {
+; CHECK-LABEL: @merged_merges(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 289081428418563599, i64* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i64 579005069656919567, i64* %ptr  ; 0x08090a0b0c0d0e0f
+
+  %bptr = bitcast i64* %ptr to i8*
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+  %wptr = bitcast i64* %ptr to i16*
+  %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
+  %dptr = bitcast i64* %ptr to i32*
+
+  ;; We should be able to merge all these stores with the i64 one above
+  ; value (not bytes) stored before  ; 0x08090a0b0c0d0e0f
+  store i32 67305985, i32* %dptr     ;   04030201
+  store i16     1541, i16* %wptr1    ;       0605
+  store i8         7, i8*  %bptr3    ;         07
+  ;                                    0x040306070c0d0e0f
+  ret void
+}
+
+define signext i8 @shouldnt_merge_since_theres_a_full_overlap(i64 *%ptr) {
+; CHECK-LABEL: @shouldnt_merge_since_theres_a_full_overlap(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BPTR:%.*]] = bitcast i64* [[PTR:%.*]] to i8*
+; CHECK-NEXT:    [[BPTRM1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 -1
+; CHECK-NEXT:    [[BPTR3:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 3
+; CHECK-NEXT:    [[DPTR:%.*]] = bitcast i8* [[BPTRM1]] to i32*
+; CHECK-NEXT:    [[QPTR:%.*]] = bitcast i8* [[BPTR3]] to i64*
+; CHECK-NEXT:    store i32 1234, i32* [[DPTR]], align 1
+; CHECK-NEXT:    store i64 5678, i64* [[QPTR]], align 1
+; CHECK-NEXT:    ret i8 0
+;
+entry:
+
+  store i64 0, i64* %ptr
+
+  %bptr = bitcast i64* %ptr to i8*
+  %bptrm1 = getelementptr inbounds i8, i8* %bptr, i64 -1
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+  %dptr = bitcast i8* %bptrm1 to i32*
+  %qptr = bitcast i8* %bptr3 to i64*
+
+  store i32 1234, i32* %dptr, align 1
+  store i64 5678, i64* %qptr, align 1
+
+  ret i8 0
+}
+
+;; Test case from PR31777
+%union.U = type { i64 }
+
+define void @foo(%union.U* nocapture %u) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I:%.*]] = getelementptr inbounds [[UNION_U:%.*]], %union.U* [[U:%.*]], i64 0, i32 0
+; CHECK-NEXT:    store i64 11821949021847552, i64* [[I]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %i = getelementptr inbounds %union.U, %union.U* %u, i64 0, i32 0
+  store i64 0, i64* %i, align 8
+  %s = bitcast %union.U* %u to i16*
+  store i16 42, i16* %s, align 8
+  ret void
+}

Added: llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores.ll?rev=314206&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores.ll Tue Sep 26 06:54:28 2017
@@ -0,0 +1,220 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -dse -enable-dse-partial-store-merging -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
+
+define void @byte_by_byte_replacement(i32 *%ptr) {
+; CHECK-LABEL: @byte_by_byte_replacement(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 202050057, i32* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  ;; This store's value should be modified as it should be better to use one
+  ;; larger store than several smaller ones.
+  ;; store will turn into 0x0C0B0A09 == 202050057
+  store i32 305419896, i32* %ptr  ; 0x12345678
+  %bptr = bitcast i32* %ptr to i8*
+  %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1
+  %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+
+  ;; We should be able to merge these four stores with the i32 above
+  ; value (and bytes) stored before  ; 0x12345678
+  store i8 9, i8* %bptr              ;         09
+  store i8 10, i8* %bptr1            ;       0A
+  store i8 11, i8* %bptr2            ;     0B
+  store i8 12, i8* %bptr3            ;   0C
+  ;                                    0x0C0B0A09
+  ret void
+}
+
+define void @word_replacement(i64 *%ptr) {
+; CHECK-LABEL: @word_replacement(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 8106482645252179720, i64* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i64 72623859790382856, i64* %ptr  ; 0x0102030405060708
+
+  %wptr = bitcast i64* %ptr to i16*
+  %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
+  %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2
+  %wptr3 = getelementptr inbounds i16, i16* %wptr, i64 3
+
+  ;; We should be able to merge these two stores with the i64 one above
+  ; value (not bytes) stored before  ; 0x0102030405060708
+  store i16  4128, i16* %wptr1       ;           1020
+  store i16 28800, i16* %wptr3       ;   7080
+  ;                                    0x7080030410200708
+  ret void
+}
+
+
+define void @differently_sized_replacements(i64 *%ptr) {
+; CHECK-LABEL: @differently_sized_replacements(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 578437695752307201, i64* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i64 579005069656919567, i64* %ptr  ; 0x08090a0b0c0d0e0f
+
+  %bptr = bitcast i64* %ptr to i8*
+  %bptr6 = getelementptr inbounds i8, i8* %bptr, i64 6
+  %wptr = bitcast i64* %ptr to i16*
+  %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2
+  %dptr = bitcast i64* %ptr to i32*
+
+  ;; We should be able to merge all these stores with the i64 one above
+  ; value (not bytes) stored before  ; 0x08090a0b0c0d0e0f
+  store i8         7, i8*  %bptr6    ;     07
+  store i16     1541, i16* %wptr2    ;       0605
+  store i32 67305985, i32* %dptr     ;           04030201
+  ;                                    0x0807060504030201
+  ret void
+}
+
+
+define void @multiple_replacements_to_same_byte(i64 *%ptr) {
+; CHECK-LABEL: @multiple_replacements_to_same_byte(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 579005069522043393, i64* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i64 579005069656919567, i64* %ptr  ; 0x08090a0b0c0d0e0f
+
+  %bptr = bitcast i64* %ptr to i8*
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+  %wptr = bitcast i64* %ptr to i16*
+  %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
+  %dptr = bitcast i64* %ptr to i32*
+
+  ;; We should be able to merge all these stores with the i64 one above
+  ; value (not bytes) stored before  ; 0x08090a0b0c0d0e0f
+  store i8         7, i8*  %bptr3    ;           07
+  store i16     1541, i16* %wptr1    ;           0605
+  store i32 67305985, i32* %dptr     ;           04030201
+  ;                                    0x08090a0b04030201
+  ret void
+}
+
+define void @merged_merges(i64 *%ptr) {
+; CHECK-LABEL: @merged_merges(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 579005069572506113, i64* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i64 579005069656919567, i64* %ptr  ; 0x08090a0b0c0d0e0f
+
+  %bptr = bitcast i64* %ptr to i8*
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+  %wptr = bitcast i64* %ptr to i16*
+  %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
+  %dptr = bitcast i64* %ptr to i32*
+
+  ;; We should be able to merge all these stores with the i64 one above
+  ; value (not bytes) stored before  ; 0x08090a0b0c0d0e0f
+  store i32 67305985, i32* %dptr     ;           04030201
+  store i16     1541, i16* %wptr1    ;           0605
+  store i8         7, i8*  %bptr3    ;           07
+  ;                                    0x08090a0b07050201
+  ret void
+}
+
+define signext i8 @shouldnt_merge_since_theres_a_full_overlap(i64 *%ptr) {
+; CHECK-LABEL: @shouldnt_merge_since_theres_a_full_overlap(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BPTR:%.*]] = bitcast i64* [[PTR:%.*]] to i8*
+; CHECK-NEXT:    [[BPTRM1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 -1
+; CHECK-NEXT:    [[BPTR3:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 3
+; CHECK-NEXT:    [[DPTR:%.*]] = bitcast i8* [[BPTRM1]] to i32*
+; CHECK-NEXT:    [[QPTR:%.*]] = bitcast i8* [[BPTR3]] to i64*
+; CHECK-NEXT:    store i32 1234, i32* [[DPTR]], align 1
+; CHECK-NEXT:    store i64 5678, i64* [[QPTR]], align 1
+; CHECK-NEXT:    ret i8 0
+;
+entry:
+
+  ; Also check that alias.scope metadata doesn't get dropped
+  store i64 0, i64* %ptr, !alias.scope !32
+
+  %bptr = bitcast i64* %ptr to i8*
+  %bptrm1 = getelementptr inbounds i8, i8* %bptr, i64 -1
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+  %dptr = bitcast i8* %bptrm1 to i32*
+  %qptr = bitcast i8* %bptr3 to i64*
+
+  store i32 1234, i32* %dptr, align 1
+  store i64 5678, i64* %qptr, align 1
+
+  ret i8 0
+}
+
+;; Test case from PR31777
+%union.U = type { i64 }
+
+define void @foo(%union.U* nocapture %u) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I:%.*]] = getelementptr inbounds [[UNION_U:%.*]], %union.U* [[U:%.*]], i64 0, i32 0
+; CHECK-NEXT:    store i64 42, i64* [[I]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %i = getelementptr inbounds %union.U, %union.U* %u, i64 0, i32 0
+  store i64 0, i64* %i, align 8, !dbg !22, !tbaa !26, !noalias !30, !nontemporal !29
+  %s = bitcast %union.U* %u to i16*
+  store i16 42, i16* %s, align 8
+  ret void
+}
+
+; Don't crash by operating on stale data if we merge (kill) the last 2 stores.
+
+define void @PR34074(i32* %x, i64* %y) {
+; CHECK-LABEL: @PR34074(
+; CHECK-NEXT:    store i64 42, i64* %y
+; CHECK-NEXT:    store i32 4, i32* %x
+; CHECK-NEXT:    ret void
+;
+  store i64 42, i64* %y          ; independent store
+  %xbc = bitcast i32* %x to i8*
+  store i32 0, i32* %x           ; big store of constant
+  store i8 4, i8* %xbc           ; small store with mergeable constant
+  ret void
+}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 5.0.0 (trunk 306512)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "me.cpp", directory: "/compiler-explorer")
+!2 = !{}
+!7 = distinct !DISubprogram(name: "foo", linkageName: "foo(U*)", scope: !1, file: !1, line: 9, type: !8, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !20)
+!8 = !DISubroutineType(types: !9)
+!9 = !{null, !10}
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64)
+!11 = distinct !DICompositeType(tag: DW_TAG_union_type, name: "U", file: !1, line: 4, size: 64, elements: !12, identifier: "typeinfo name for U")
+!12 = !{!13, !17}
+!13 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !11, file: !1, line: 5, baseType: !14, size: 64)
+!14 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint64_t", file: !15, line: 55, baseType: !16)
+!15 = !DIFile(filename: "/usr/include/stdint.h", directory: "/compiler-explorer")
+!16 = !DIBasicType(name: "long unsigned int", size: 64, encoding: DW_ATE_unsigned)
+!17 = !DIDerivedType(tag: DW_TAG_member, name: "s", scope: !11, file: !1, line: 6, baseType: !18, size: 16)
+!18 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint16_t", file: !15, line: 49, baseType: !19)
+!19 = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned)
+!20 = !{!21}
+!21 = !DILocalVariable(name: "u", arg: 1, scope: !7, file: !1, line: 9, type: !10)
+!22 = !DILocation(line: 10, column: 8, scope: !7)
+
+!26 = !{!27, !27, i64 0}
+!27 = !{!"omnipotent char", !28, i64 0}
+!28 = !{!"Simple C++ TBAA"}
+
+!29 = !{i32 1}
+
+; Domains and scopes which might alias
+!30 = !{!30}
+!31 = !{!31, !30}
+
+!32 = !{!32}
+!33 = !{!33, !32}




More information about the llvm-commits mailing list