[llvm] r310055 - [DSE] Merge stores when the later store only writes to memory locations the early store also wrote to.

Filipe Cabecinhas via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 4 05:28:36 PDT 2017


Author: filcab
Date: Fri Aug  4 05:28:36 2017
New Revision: 310055

URL: http://llvm.org/viewvc/llvm-project?rev=310055&view=rev
Log:
[DSE] Merge stores when the later store only writes to memory locations the early store also wrote to.

Summary:
This fixes PR31777.

If both stores' values are ConstantInt, we merge the two stores
(shifting the smaller store appropriately) and replace the earlier (and
larger) store with an updated constant.

In the future we should also support vectors of integers. And maybe
float/double if we can.

Reviewers: hfinkel, junbuml, jfb, RKSimon, bkramer

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D30703

Added:
    llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll
    llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
    llvm/trunk/test/Transforms/DeadStoreElimination/PartialStore.ll
    llvm/trunk/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll

Modified: llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp?rev=310055&r1=310054&r2=310055&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp Fri Aug  4 05:28:36 2017
@@ -34,6 +34,7 @@
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -49,12 +50,18 @@ STATISTIC(NumRedundantStores, "Number of
 STATISTIC(NumFastStores, "Number of stores deleted");
 STATISTIC(NumFastOther , "Number of other instrs removed");
 STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
+STATISTIC(NumModifiedStores, "Number of stores modified");
 
 static cl::opt<bool>
 EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
   cl::init(true), cl::Hidden,
   cl::desc("Enable partial-overwrite tracking in DSE"));
 
+static cl::opt<bool>
+EnablePartialStoreMerging("enable-dse-partial-store-merging",
+  cl::init(true), cl::Hidden,
+  cl::desc("Enable partial store merging in DSE"));
+
 
 //===----------------------------------------------------------------------===//
 // Helper functions
@@ -287,14 +294,22 @@ static uint64_t getPointerSize(const Val
 }
 
 namespace {
-enum OverwriteResult { OW_Begin, OW_Complete, OW_End, OW_Unknown };
+enum OverwriteResult {
+  OW_Begin,
+  OW_Complete,
+  OW_End,
+  OW_PartialEarlierWithFullLater,
+  OW_Unknown
+};
 }
 
 /// Return 'OW_Complete' if a store to the 'Later' location completely
 /// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the
 /// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the
-/// beginning of the 'Earlier' location is overwritten by 'Later', or
-/// 'OW_Unknown' if nothing can be determined.
+/// beginning of the 'Earlier' location is overwritten by 'Later'.
+/// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was
+/// overwritten by a latter (smaller) store which doesn't write outside the big
+/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
 static OverwriteResult isOverwrite(const MemoryLocation &Later,
                                    const MemoryLocation &Earlier,
                                    const DataLayout &DL,
@@ -427,6 +442,19 @@ static OverwriteResult isOverwrite(const
     }
   }
 
+  // Check for an earlier store which writes to all the memory locations that
+  // the later store writes to.
+  if (EnablePartialStoreMerging && LaterOff >= EarlierOff &&
+      int64_t(EarlierOff + Earlier.Size) > LaterOff &&
+      uint64_t(LaterOff - EarlierOff) + Later.Size <= Earlier.Size) {
+    DEBUG(dbgs() << "DSE: Partial overwrite an earlier load [" << EarlierOff
+                 << ", " << int64_t(EarlierOff + Earlier.Size)
+                 << ") by a later store [" << LaterOff << ", "
+                 << int64_t(LaterOff + Later.Size) << ")\n");
+    // TODO: Maybe come up with a better name?
+    return OW_PartialEarlierWithFullLater;
+  }
+
   // Another interesting case is if the later store overwrites the end of the
   // earlier store.
   //
@@ -1094,6 +1122,8 @@ static bool eliminateDeadStores(BasicBlo
       // If we find a write that is a) removable (i.e., non-volatile), b) is
       // completely obliterated by the store to 'Loc', and c) which we know that
       // 'Inst' doesn't load from, then we can remove it.
+      // Also try to merge two stores if a latter one only touches memory
+      // written to by the earlier one.
       if (isRemovable(DepWrite) &&
           !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) {
         int64_t InstWriteOffset, DepWriteOffset;
@@ -1123,6 +1153,73 @@ static bool eliminateDeadStores(BasicBlo
           bool IsOverwriteEnd = (OR == OW_End);
           MadeChange |= tryToShorten(DepWrite, DepWriteOffset, EarlierSize,
                                     InstWriteOffset, LaterSize, IsOverwriteEnd);
+        } else if (EnablePartialStoreMerging &&
+                   OR == OW_PartialEarlierWithFullLater) {
+          auto *Earlier = dyn_cast<StoreInst>(DepWrite);
+          auto *Later = dyn_cast<StoreInst>(Inst);
+          if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) &&
+              Later && isa<ConstantInt>(Later->getValueOperand())) {
+            // If the store we find is:
+            //   a) partially overwritten by the store to 'Loc'
+            //   b) the latter store is fully contained in the earlier one and
+            //   c) They both have a contant value
+            // Merge the two stores, replacing the earlier store's value with a
+            // merge of both values.
+            // TODO: Deal with other constant types (vectors, etc), and probably
+            // some mem intrinsics (if needed)
+
+            APInt EarlierValue =
+                cast<ConstantInt>(Earlier->getValueOperand())->getValue();
+            APInt LaterValue =
+                cast<ConstantInt>(Later->getValueOperand())->getValue();
+            unsigned LaterBits = LaterValue.getBitWidth();
+            assert(EarlierValue.getBitWidth() > LaterValue.getBitWidth());
+            LaterValue = LaterValue.zext(EarlierValue.getBitWidth());
+
+            // Offset of the smaller store inside the larger store
+            unsigned BitOffsetDiff = (InstWriteOffset - DepWriteOffset) * 8;
+            unsigned LShiftAmount =
+                DL.isBigEndian()
+                    ? EarlierValue.getBitWidth() - BitOffsetDiff - LaterBits
+                    : BitOffsetDiff;
+            APInt Mask =
+                APInt::getBitsSet(EarlierValue.getBitWidth(), LShiftAmount,
+                                  LShiftAmount + LaterBits);
+            // Clear the bits we'll be replacing, then OR with the smaller
+            // store, shifted appropriately.
+            APInt Merged =
+                (EarlierValue & ~Mask) | (LaterValue << LShiftAmount);
+            DEBUG(dbgs() << "DSE: Merge Stores:\n  Earlier: " << *DepWrite
+                         << "\n  Later: " << *Inst
+                         << "\n  Merged Value: " << Merged << '\n');
+
+            auto *SI = new StoreInst(
+                ConstantInt::get(Earlier->getValueOperand()->getType(), Merged),
+                Earlier->getPointerOperand(), false, Earlier->getAlignment(),
+                Earlier->getOrdering(), Earlier->getSyncScopeID(), DepWrite);
+
+            unsigned MDToKeep[] = {LLVMContext::MD_dbg, LLVMContext::MD_tbaa,
+                                   LLVMContext::MD_alias_scope,
+                                   LLVMContext::MD_noalias,
+                                   LLVMContext::MD_nontemporal};
+            SI->copyMetadata(*DepWrite, MDToKeep);
+            ++NumModifiedStores;
+
+            // Remove earlier, wider, store
+            size_t Idx = InstrOrdering.lookup(DepWrite);
+            InstrOrdering.erase(DepWrite);
+            InstrOrdering.insert(std::make_pair(SI, Idx));
+
+            // Delete the old stores and now-dead instructions that feed them.
+            deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL, &InstrOrdering);
+            deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL,
+                                  &InstrOrdering);
+            MadeChange = true;
+
+            //// We erased DepWrite; start over.
+            InstDep = MD->getDependency(SI);
+            continue;
+          }
         }
       }
 

Modified: llvm/trunk/test/Transforms/DeadStoreElimination/PartialStore.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/PartialStore.ll?rev=310055&r1=310054&r2=310055&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/PartialStore.ll (original)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/PartialStore.ll Fri Aug  4 05:28:36 2017
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+; RUN: opt < %s -basicaa -dse -enable-dse-partial-store-merging=false -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 ; Ensure that the dead store is deleted in this case.  It is wholely

Modified: llvm/trunk/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll?rev=310055&r1=310054&r2=310055&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll (original)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll Fri Aug  4 05:28:36 2017
@@ -1,4 +1,4 @@
-; RUN: opt -S -dse < %s | FileCheck %s
+; RUN: opt -S -dse -enable-dse-partial-store-merging=false < %s | FileCheck %s
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-bgq-linux"
 

Added: llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll?rev=310055&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll Fri Aug  4 05:28:36 2017
@@ -0,0 +1,173 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -dse -enable-dse-partial-store-merging -S < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-i128:128-n32:64-S128"
+
+define void @byte_by_byte_replacement(i32 *%ptr) {
+; CHECK-LABEL: @byte_by_byte_replacement(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 151653132, i32* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  ;; This store's value should be modified as it should be better to use one
+  ;; larger store than several smaller ones.
+  ;; store will turn into 0x090A0B0C == 151653132
+  store i32 305419896, i32* %ptr  ; 0x12345678
+  %bptr = bitcast i32* %ptr to i8*
+  %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1
+  %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+
+  ;; We should be able to merge these four stores with the i32 above
+  ; value (and bytes) stored before  ; 0x12345678
+  store i8 9, i8* %bptr              ;   09
+  store i8 10, i8* %bptr1            ;     0A
+  store i8 11, i8* %bptr2            ;       0B
+  store i8 12, i8* %bptr3            ;         0C
+  ;                                    0x090A0B0C
+
+  ret void
+}
+
+define void @word_replacement(i64 *%ptr) {
+; CHECK-LABEL: @word_replacement(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 72638273700655232, i64* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i64 72623859790382856, i64* %ptr  ; 0x0102030405060708
+
+  %wptr = bitcast i64* %ptr to i16*
+  %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
+  %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2
+  %wptr3 = getelementptr inbounds i16, i16* %wptr, i64 3
+
+  ;; We should be able to merge these two stores with the i64 one above
+  ; value (and bytes) stored before  ; 0x0102030405060708
+  store i16  4128, i16* %wptr1       ;       1020
+  store i16 28800, i16* %wptr3       ;               7080
+  ;                                    0x0102102005067080
+
+  ret void
+}
+
+
+define void @differently_sized_replacements(i64 *%ptr) {
+; CHECK-LABEL: @differently_sized_replacements(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 289077004501059343, i64* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i64 579005069656919567, i64* %ptr  ; 0x08090a0b0c0d0e0f
+
+  %bptr = bitcast i64* %ptr to i8*
+  %bptr6 = getelementptr inbounds i8, i8* %bptr, i64 6
+  %wptr = bitcast i64* %ptr to i16*
+  %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2
+  %dptr = bitcast i64* %ptr to i32*
+
+  ;; We should be able to merge all these stores with the i64 one above
+  ; value (and bytes) stored before  ; 0x08090a0b0c0d0e0f
+  store i8         7, i8*  %bptr6    ;               07
+  store i16     1541, i16* %wptr2    ;           0605
+  store i32 67305985, i32* %dptr     ;   04030201
+  ;                                    0x040302010605070f
+  ret void
+}
+
+
+define void @multiple_replacements_to_same_byte(i64 *%ptr) {
+; CHECK-LABEL: @multiple_replacements_to_same_byte(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 289077004602248719, i64* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i64 579005069656919567, i64* %ptr  ; 0x08090a0b0c0d0e0f
+
+  %bptr = bitcast i64* %ptr to i8*
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+  %wptr = bitcast i64* %ptr to i16*
+  %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
+  %dptr = bitcast i64* %ptr to i32*
+
+  ;; We should be able to merge all these stores with the i64 one above
+  ; value (and bytes) stored before  ; 0x08090a0b0c0d0e0f
+  store i8         7, i8*  %bptr3    ;         07
+  store i16     1541, i16* %wptr1    ;       0605
+  store i32 67305985, i32* %dptr     ;   04030201
+  ;                                    0x040302010c0d0e0f
+  ret void
+}
+
+define void @merged_merges(i64 *%ptr) {
+; CHECK-LABEL: @merged_merges(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 289081428418563599, i64* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i64 579005069656919567, i64* %ptr  ; 0x08090a0b0c0d0e0f
+
+  %bptr = bitcast i64* %ptr to i8*
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+  %wptr = bitcast i64* %ptr to i16*
+  %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
+  %dptr = bitcast i64* %ptr to i32*
+
+  ;; We should be able to merge all these stores with the i64 one above
+  ; value (not bytes) stored before  ; 0x08090a0b0c0d0e0f
+  store i32 67305985, i32* %dptr     ;   04030201
+  store i16     1541, i16* %wptr1    ;       0605
+  store i8         7, i8*  %bptr3    ;         07
+  ;                                    0x040306070c0d0e0f
+  ret void
+}
+
+define signext i8 @shouldnt_merge_since_theres_a_full_overlap(i64 *%ptr) {
+; CHECK-LABEL: @shouldnt_merge_since_theres_a_full_overlap(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BPTR:%.*]] = bitcast i64* [[PTR:%.*]] to i8*
+; CHECK-NEXT:    [[BPTRM1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 -1
+; CHECK-NEXT:    [[BPTR3:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 3
+; CHECK-NEXT:    [[DPTR:%.*]] = bitcast i8* [[BPTRM1]] to i32*
+; CHECK-NEXT:    [[QPTR:%.*]] = bitcast i8* [[BPTR3]] to i64*
+; CHECK-NEXT:    store i32 1234, i32* [[DPTR]], align 1
+; CHECK-NEXT:    store i64 5678, i64* [[QPTR]], align 1
+; CHECK-NEXT:    ret i8 0
+;
+entry:
+
+  store i64 0, i64* %ptr
+
+  %bptr = bitcast i64* %ptr to i8*
+  %bptrm1 = getelementptr inbounds i8, i8* %bptr, i64 -1
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+  %dptr = bitcast i8* %bptrm1 to i32*
+  %qptr = bitcast i8* %bptr3 to i64*
+
+  store i32 1234, i32* %dptr, align 1
+  store i64 5678, i64* %qptr, align 1
+
+  ret i8 0
+}
+
+;; Test case from PR31777
+%union.U = type { i64 }
+
+define void @foo(%union.U* nocapture %u) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I:%.*]] = getelementptr inbounds [[UNION_U:%.*]], %union.U* [[U:%.*]], i64 0, i32 0
+; CHECK-NEXT:    store i64 11821949021847552, i64* [[I]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %i = getelementptr inbounds %union.U, %union.U* %u, i64 0, i32 0
+  store i64 0, i64* %i, align 8
+  %s = bitcast %union.U* %u to i16*
+  store i16 42, i16* %s, align 8
+  ret void
+}

Added: llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores.ll?rev=310055&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores.ll Fri Aug  4 05:28:36 2017
@@ -0,0 +1,205 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -dse -enable-dse-partial-store-merging -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
+
+define void @byte_by_byte_replacement(i32 *%ptr) {
+; CHECK-LABEL: @byte_by_byte_replacement(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 202050057, i32* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  ;; This store's value should be modified as it should be better to use one
+  ;; larger store than several smaller ones.
+  ;; store will turn into 0x0C0B0A09 == 202050057
+  store i32 305419896, i32* %ptr  ; 0x12345678
+  %bptr = bitcast i32* %ptr to i8*
+  %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1
+  %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+
+  ;; We should be able to merge these four stores with the i32 above
+  ; value (and bytes) stored before  ; 0x12345678
+  store i8 9, i8* %bptr              ;         09
+  store i8 10, i8* %bptr1            ;       0A
+  store i8 11, i8* %bptr2            ;     0B
+  store i8 12, i8* %bptr3            ;   0C
+  ;                                    0x0C0B0A09
+  ret void
+}
+
+define void @word_replacement(i64 *%ptr) {
+; CHECK-LABEL: @word_replacement(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 8106482645252179720, i64* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i64 72623859790382856, i64* %ptr  ; 0x0102030405060708
+
+  %wptr = bitcast i64* %ptr to i16*
+  %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
+  %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2
+  %wptr3 = getelementptr inbounds i16, i16* %wptr, i64 3
+
+  ;; We should be able to merge these two stores with the i64 one above
+  ; value (not bytes) stored before  ; 0x0102030405060708
+  store i16  4128, i16* %wptr1       ;           1020
+  store i16 28800, i16* %wptr3       ;   7080
+  ;                                    0x7080030410200708
+  ret void
+}
+
+
+define void @differently_sized_replacements(i64 *%ptr) {
+; CHECK-LABEL: @differently_sized_replacements(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 578437695752307201, i64* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i64 579005069656919567, i64* %ptr  ; 0x08090a0b0c0d0e0f
+
+  %bptr = bitcast i64* %ptr to i8*
+  %bptr6 = getelementptr inbounds i8, i8* %bptr, i64 6
+  %wptr = bitcast i64* %ptr to i16*
+  %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2
+  %dptr = bitcast i64* %ptr to i32*
+
+  ;; We should be able to merge all these stores with the i64 one above
+  ; value (not bytes) stored before  ; 0x08090a0b0c0d0e0f
+  store i8         7, i8*  %bptr6    ;     07
+  store i16     1541, i16* %wptr2    ;       0605
+  store i32 67305985, i32* %dptr     ;           04030201
+  ;                                    0x0807060504030201
+  ret void
+}
+
+
+define void @multiple_replacements_to_same_byte(i64 *%ptr) {
+; CHECK-LABEL: @multiple_replacements_to_same_byte(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 579005069522043393, i64* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i64 579005069656919567, i64* %ptr  ; 0x08090a0b0c0d0e0f
+
+  %bptr = bitcast i64* %ptr to i8*
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+  %wptr = bitcast i64* %ptr to i16*
+  %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
+  %dptr = bitcast i64* %ptr to i32*
+
+  ;; We should be able to merge all these stores with the i64 one above
+  ; value (not bytes) stored before  ; 0x08090a0b0c0d0e0f
+  store i8         7, i8*  %bptr3    ;           07
+  store i16     1541, i16* %wptr1    ;           0605
+  store i32 67305985, i32* %dptr     ;           04030201
+  ;                                    0x08090a0b04030201
+  ret void
+}
+
+define void @merged_merges(i64 *%ptr) {
+; CHECK-LABEL: @merged_merges(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 579005069572506113, i64* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i64 579005069656919567, i64* %ptr  ; 0x08090a0b0c0d0e0f
+
+  %bptr = bitcast i64* %ptr to i8*
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+  %wptr = bitcast i64* %ptr to i16*
+  %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
+  %dptr = bitcast i64* %ptr to i32*
+
+  ;; We should be able to merge all these stores with the i64 one above
+  ; value (not bytes) stored before  ; 0x08090a0b0c0d0e0f
+  store i32 67305985, i32* %dptr     ;           04030201
+  store i16     1541, i16* %wptr1    ;           0605
+  store i8         7, i8*  %bptr3    ;           07
+  ;                                    0x08090a0b07050201
+  ret void
+}
+
+define signext i8 @shouldnt_merge_since_theres_a_full_overlap(i64 *%ptr) {
+; CHECK-LABEL: @shouldnt_merge_since_theres_a_full_overlap(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BPTR:%.*]] = bitcast i64* [[PTR:%.*]] to i8*
+; CHECK-NEXT:    [[BPTRM1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 -1
+; CHECK-NEXT:    [[BPTR3:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 3
+; CHECK-NEXT:    [[DPTR:%.*]] = bitcast i8* [[BPTRM1]] to i32*
+; CHECK-NEXT:    [[QPTR:%.*]] = bitcast i8* [[BPTR3]] to i64*
+; CHECK-NEXT:    store i32 1234, i32* [[DPTR]], align 1
+; CHECK-NEXT:    store i64 5678, i64* [[QPTR]], align 1
+; CHECK-NEXT:    ret i8 0
+;
+entry:
+
+  ; Also check that alias.scope metadata doesn't get dropped
+  store i64 0, i64* %ptr, !alias.scope !32
+
+  %bptr = bitcast i64* %ptr to i8*
+  %bptrm1 = getelementptr inbounds i8, i8* %bptr, i64 -1
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+  %dptr = bitcast i8* %bptrm1 to i32*
+  %qptr = bitcast i8* %bptr3 to i64*
+
+  store i32 1234, i32* %dptr, align 1
+  store i64 5678, i64* %qptr, align 1
+
+  ret i8 0
+}
+
+;; Test case from PR31777
+%union.U = type { i64 }
+
+define void @foo(%union.U* nocapture %u) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I:%.*]] = getelementptr inbounds [[UNION_U:%.*]], %union.U* [[U:%.*]], i64 0, i32 0
+; CHECK-NEXT:    store i64 42, i64* [[I]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %i = getelementptr inbounds %union.U, %union.U* %u, i64 0, i32 0
+  store i64 0, i64* %i, align 8, !dbg !22, !tbaa !26, !noalias !30, !nontemporal !29
+  %s = bitcast %union.U* %u to i16*
+  store i16 42, i16* %s, align 8
+  ret void
+}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 5.0.0 (trunk 306512)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "me.cpp", directory: "/compiler-explorer")
+!2 = !{}
+!7 = distinct !DISubprogram(name: "foo", linkageName: "foo(U*)", scope: !1, file: !1, line: 9, type: !8, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !20)
+!8 = !DISubroutineType(types: !9)
+!9 = !{null, !10}
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64)
+!11 = distinct !DICompositeType(tag: DW_TAG_union_type, name: "U", file: !1, line: 4, size: 64, elements: !12, identifier: "typeinfo name for U")
+!12 = !{!13, !17}
+!13 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !11, file: !1, line: 5, baseType: !14, size: 64)
+!14 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint64_t", file: !15, line: 55, baseType: !16)
+!15 = !DIFile(filename: "/usr/include/stdint.h", directory: "/compiler-explorer")
+!16 = !DIBasicType(name: "long unsigned int", size: 64, encoding: DW_ATE_unsigned)
+!17 = !DIDerivedType(tag: DW_TAG_member, name: "s", scope: !11, file: !1, line: 6, baseType: !18, size: 16)
+!18 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint16_t", file: !15, line: 49, baseType: !19)
+!19 = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned)
+!20 = !{!21}
+!21 = !DILocalVariable(name: "u", arg: 1, scope: !7, file: !1, line: 9, type: !10)
+!22 = !DILocation(line: 10, column: 8, scope: !7)
+
+!26 = !{!27, !27, i64 0}
+!27 = !{!"omnipotent char", !28, i64 0}
+!28 = !{!"Simple C++ TBAA"}
+
+!29 = !{i32 1}
+
+; Domains and scopes which might alias
+!30 = !{!30}
+!31 = !{!31, !30}
+
+!32 = !{!32}
+!33 = !{!33, !32}




More information about the llvm-commits mailing list