[llvm] r346432 - [DAGCombine] Improve alias analysis for chain of independent stores.

Nirav Dave via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 8 11:14:21 PST 2018


Author: niravd
Date: Thu Nov  8 11:14:20 2018
New Revision: 346432

URL: http://llvm.org/viewvc/llvm-project?rev=346432&view=rev
Log:
[DAGCombine] Improve alias analysis for chain of independent stores.

FindBetterNeighborChains simulateanously improves the chain
dependencies of a chain of related stores avoiding the generation of
extra token factors. For chains longer than the GatherAllAliasDepths,
stores further down in the chain will necessarily fail, a potentially
significant waste and preventing otherwise trivial parallelization.

This patch directly parallelize the chains of stores before improving
each store. This generally improves DAG-level parallelism.

Reviewers: courbet, spatel, RKSimon, bogner, efriedma, craig.topper, rnk

Subscribers: sdardis, javed.absar, hiraditya, jrtc27, atanasyan, llvm-commits

Differential Revision: https://reviews.llvm.org/D53552

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/AArch64/arm64-abi-varargs.ll
    llvm/trunk/test/CodeGen/AArch64/ldst-opt.ll
    llvm/trunk/test/CodeGen/AArch64/swifterror.ll
    llvm/trunk/test/CodeGen/ARM/arm-storebytesmerge.ll
    llvm/trunk/test/CodeGen/ARM/misched-fusion-aes.ll
    llvm/trunk/test/CodeGen/Mips/fastcc.ll
    llvm/trunk/test/CodeGen/SystemZ/pr36164.ll
    llvm/trunk/test/CodeGen/X86/stores-merging.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=346432&r1=346431&r2=346432&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Nov  8 11:14:20 2018
@@ -20,6 +20,7 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IntervalMap.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
@@ -490,6 +491,10 @@ namespace {
     /// returns false.
     bool findBetterNeighborChains(StoreSDNode *St);
 
+    // Helper for findBetterNeighborChains. Walk up store chain add additional
+    // chained stores that do not overlap and can be parallelized.
+    bool parallelizeChainedStores(StoreSDNode *St);
+
     /// Holds a pointer to an LSBaseSDNode as well as information on where it
     /// is located in a sequence of memory operations connected by a chain.
     struct MemOpLink {
@@ -18905,6 +18910,11 @@ SDValue DAGCombiner::FindBetterChain(SDN
   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
 }
 
+// TODO: Replace with with std::monostate when we move to C++17.
+struct UnitT { } Unit;
+bool operator==(const UnitT &, const UnitT &) { return true; }
+bool operator!=(const UnitT &, const UnitT &) { return false; }
+
 // This function tries to collect a bunch of potentially interesting
 // nodes to improve the chains of, all at once. This might seem
 // redundant, as this function gets called when visiting every store
@@ -18917,13 +18927,22 @@ SDValue DAGCombiner::FindBetterChain(SDN
 // the nodes that will eventually be candidates, and then not be able
 // to go from a partially-merged state to the desired final
 // fully-merged state.
-bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
-  if (OptLevel == CodeGenOpt::None)
-    return false;
+
+bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
+  SmallVector<StoreSDNode *, 8> ChainedStores;
+  StoreSDNode *STChain = St;
+  // Intervals records which offsets from BaseIndex have been covered. In
+  // the common case, every store writes to the immediately previous address
+  // space and thus merged with the previous interval at insertion time.
+
+  using IMap =
+      llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
+  IMap::Allocator A;
+  IMap Intervals(A);
 
   // This holds the base pointer, index, and the offset in bytes from the base
   // pointer.
-  BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
+  const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
 
   // We must have a base and an offset.
   if (!BasePtr.getBase().getNode())
@@ -18933,76 +18952,114 @@ bool DAGCombiner::findBetterNeighborChai
   if (BasePtr.getBase().isUndef())
     return false;
 
-  SmallVector<StoreSDNode *, 8> ChainedStores;
-  ChainedStores.push_back(St);
+  // Add ST's interval.
+  Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
 
-  // Walk up the chain and look for nodes with offsets from the same
-  // base pointer. Stop when reaching an instruction with a different kind
-  // or instruction which has a different base pointer.
-  StoreSDNode *Index = St;
-  while (Index) {
+  while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
     // If the chain has more than one use, then we can't reorder the mem ops.
-    if (Index != St && !SDValue(Index, 0)->hasOneUse())
+    if (!SDValue(Chain, 0)->hasOneUse())
       break;
-
-    if (Index->isVolatile() || Index->isIndexed())
+    if (Chain->isVolatile() || Chain->isIndexed())
       break;
 
     // Find the base pointer and offset for this memory node.
-    BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG);
-
+    const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
     // Check that the base pointer is the same as the original one.
-    if (!BasePtr.equalBaseIndex(Ptr, DAG))
+    int64_t Offset;
+    if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
+      break;
+    int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
+    // Make sure we don't overlap with other intervals by checking the ones to
+    // the left or right before inserting.
+    auto I = Intervals.find(Offset);
+    // If there's a next interval, we should end before it.
+    if (I != Intervals.end() && I.start() < (Offset + Length))
       break;
+    // If there's a previous interval, we should start after it.
+    if (I != Intervals.begin() && (--I).stop() <= Offset)
+      break;
+    Intervals.insert(Offset, Offset + Length, Unit);
 
-    // Walk up the chain to find the next store node, ignoring any
-    // intermediate loads. Any other kind of node will halt the loop.
-    SDNode *NextInChain = Index->getChain().getNode();
-    while (true) {
-      if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
-        // We found a store node. Use it for the next iteration.
-        if (STn->isVolatile() || STn->isIndexed()) {
-          Index = nullptr;
-          break;
-        }
-        ChainedStores.push_back(STn);
-        Index = STn;
-        break;
-      } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
-        NextInChain = Ldn->getChain().getNode();
-        continue;
-      } else {
-        Index = nullptr;
-        break;
-      }
-    }// end while
+    ChainedStores.push_back(Chain);
+    STChain = Chain;
   }
 
-  // At this point, ChainedStores lists all of the Store nodes
-  // reachable by iterating up through chain nodes matching the above
-  // conditions.  For each such store identified, try to find an
-  // earlier chain to attach the store to which won't violate the
-  // required ordering.
-  bool MadeChangeToSt = false;
-  SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
-
-  for (StoreSDNode *ChainedStore : ChainedStores) {
-    SDValue Chain = ChainedStore->getChain();
-    SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
+  // If we didn't find a chained store, exit.
+  if (ChainedStores.size() == 0)
+    return false;
 
-    if (Chain != BetterChain) {
-      if (ChainedStore == St)
-        MadeChangeToSt = true;
-      BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
-    }
-  }
+  // Improve all chained stores (St and ChainedStores members) starting from
+  // where the store chain ended and return single TokenFactor.
+  SDValue NewChain = STChain->getChain();
+  SmallVector<SDValue, 8> TFOps;
+  for (unsigned I = ChainedStores.size(); I;) {
+    StoreSDNode *S = ChainedStores[--I];
+    SDValue BetterChain = FindBetterChain(S, NewChain);
+    S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
+        S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
+    TFOps.push_back(SDValue(S, 0));
+    ChainedStores[I] = S;
+  }
+
+  // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
+  SDValue BetterChain = FindBetterChain(St, NewChain);
+  SDValue NewST;
+  if (St->isTruncatingStore())
+    NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
+                              St->getBasePtr(), St->getMemoryVT(),
+                              St->getMemOperand());
+  else
+    NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
+                         St->getBasePtr(), St->getMemOperand());
+
+  TFOps.push_back(NewST);
+
+  // If we improved every element of TFOps, then we've lost the dependence on
+  // NewChain to successors of St and we need to add it back to TFOps. Do so at
+  // the beginning to keep relative order consistent with FindBetterChains.
+  auto hasImprovedChain = [&](SDValue ST) -> bool {
+    return ST->getOperand(0) != NewChain;
+  };
+  bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
+  if (AddNewChain)
+    TFOps.insert(TFOps.begin(), NewChain);
+
+  SDValue TF = DAG.getNode(ISD::TokenFactor, SDLoc(STChain), MVT::Other, TFOps);
+  CombineTo(St, TF);
+
+  AddToWorklist(STChain);
+  // Add TF operands worklist in reverse order.
+  for (auto I = TF->getNumOperands(); I;)
+    AddToWorklist(TF->getOperand(--I).getNode());
+  AddToWorklist(TF.getNode());
+  return true;
+}
+
+bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
+  if (OptLevel == CodeGenOpt::None)
+    return false;
+
+  const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
 
-  // Do all replacements after finding the replacements to make to avoid making
-  // the chains more complicated by introducing new TokenFactors.
-  for (auto Replacement : BetterChains)
-    replaceStoreChain(Replacement.first, Replacement.second);
+  // We must have a base and an offset.
+  if (!BasePtr.getBase().getNode())
+    return false;
+
+  // Do not handle stores to undef base pointers.
+  if (BasePtr.getBase().isUndef())
+    return false;
+
+  // Directly improve a chain of disjoint stores starting at St.
+  if (parallelizeChainedStores(St))
+    return true;
 
-  return MadeChangeToSt;
+  // Improve St's Chain..
+  SDValue BetterChain = FindBetterChain(St, St->getChain());
+  if (St->getChain() != BetterChain) {
+    replaceStoreChain(St, BetterChain);
+    return true;
+  }
+  return false;
 }
 
 /// This is the entry point for the file.

Modified: llvm/trunk/test/CodeGen/AArch64/arm64-abi-varargs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-abi-varargs.ll?rev=346432&r1=346431&r2=346432&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-abi-varargs.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-abi-varargs.ll Thu Nov  8 11:14:20 2018
@@ -7,14 +7,13 @@ define void @fn9(i32* %a1, i32 %a2, i32
 ; CHECK-LABEL: fn9:
 ; 9th fixed argument
 ; CHECK: ldr {{w[0-9]+}}, [sp, #64]
-; CHECK: add [[ARGS:x[0-9]+]], sp, #72
-; CHECK: add {{x[0-9]+}}, [[ARGS]], #8
+; CHECK-DAG: add [[ARGS:x[0-9]+]], sp, #72
 ; First vararg
-; CHECK: ldr {{w[0-9]+}}, [sp, #72]
+; CHECK-DAG: ldr {{w[0-9]+}}, [sp, #72]
 ; Second vararg
-; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8
+; CHECK-DAG: ldr {{w[0-9]+}}, [sp, #80]
 ; Third vararg
-; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8
+; CHECK-DAG: ldr {{w[0-9]+}}, [sp, #88]
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4

Modified: llvm/trunk/test/CodeGen/AArch64/ldst-opt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/ldst-opt.ll?rev=346432&r1=346431&r2=346432&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/ldst-opt.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/ldst-opt.ll Thu Nov  8 11:14:20 2018
@@ -1465,10 +1465,10 @@ entry:
 define void @merge_zr32_3vec(<3 x i32>* %p) {
 ; CHECK-LABEL: merge_zr32_3vec:
 ; CHECK: // %entry
-; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}]
 ; NOSTRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #8]
-; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
-; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #8]
+; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}]
+; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #4]
+; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}]
 ; CHECK-NEXT: ret
 entry:
   store <3 x i32> zeroinitializer, <3 x i32>* %p
@@ -1480,8 +1480,8 @@ define void @merge_zr32_4vec(<4 x i32>*
 ; CHECK-LABEL: merge_zr32_4vec:
 ; CHECK: // %entry
 ; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
-; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8]
+; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
 ; CHECK-NEXT: ret
 entry:
   store <4 x i32> zeroinitializer, <4 x i32>* %p
@@ -1505,8 +1505,8 @@ define void @merge_zr32_4vecf(<4 x float
 ; CHECK-LABEL: merge_zr32_4vecf:
 ; CHECK: // %entry
 ; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
-; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
 ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8]
+; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
 ; CHECK-NEXT: ret
 entry:
   store <4 x float> zeroinitializer, <4 x float>* %p
@@ -1589,8 +1589,8 @@ entry:
 define void @merge_zr64_3vec(<3 x i64>* %p) {
 ; CHECK-LABEL: merge_zr64_3vec:
 ; CHECK: // %entry
-; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
-; CHECK-NEXT: str xzr, [x{{[0-9]+}}, #16]
+; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}, #8]
+; CHECK-NEXT: str xzr, [x{{[0-9]+}}]
 ; CHECK-NEXT: ret
 entry:
   store <3 x i64> zeroinitializer, <3 x i64>* %p

Modified: llvm/trunk/test/CodeGen/AArch64/swifterror.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/swifterror.ll?rev=346432&r1=346431&r2=346432&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/swifterror.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/swifterror.ll Thu Nov  8 11:14:20 2018
@@ -314,13 +314,12 @@ define float @foo_vararg(%swift_error**
 ; CHECK-APPLE-DAG: strb [[ID]], [x0, #8]
 
 ; First vararg
-; CHECK-APPLE-DAG: orr {{x[0-9]+}}, [[ARGS]], #0x8
 ; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #16]
 ; Second vararg
-; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8
+; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #24]
 ; CHECK-APPLE-DAG: add {{x[0-9]+}}, {{x[0-9]+}}, #16
 ; Third vararg
-; CHECK-APPLE: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8
+; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #32]
 
 ; CHECK-APPLE: mov x21, x0
 ; CHECK-APPLE-NOT: x21

Modified: llvm/trunk/test/CodeGen/ARM/arm-storebytesmerge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/arm-storebytesmerge.ll?rev=346432&r1=346431&r2=346432&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/arm-storebytesmerge.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/arm-storebytesmerge.ll Thu Nov  8 11:14:20 2018
@@ -8,101 +8,95 @@ target triple = "thumbv7em-arm-none-eabi
 define arm_aapcs_vfpcc void @test(i8* %v50) #0 {
 ; CHECK-LABEL: test:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    movw r1, #35722
-; CHECK-NEXT:    movt r1, #36236
-; CHECK-NEXT:    str.w r1, [r0, #394]
-; CHECK-NEXT:    movw r1, #36750
-; CHECK-NEXT:    movt r1, #37264
-; CHECK-NEXT:    str.w r1, [r0, #398]
-; CHECK-NEXT:    movw r1, #37778
-; CHECK-NEXT:    movt r1, #38292
-; CHECK-NEXT:    str.w r1, [r0, #402]
-; CHECK-NEXT:    movw r1, #38806
-; CHECK-NEXT:    movt r1, #39320
-; CHECK-NEXT:    str.w r1, [r0, #406]
-; CHECK-NEXT:    movw r1, #39834
-; CHECK-NEXT:    strh.w r1, [r0, #410]
-; CHECK-NEXT:    movw r1, #40348
-; CHECK-NEXT:    movt r1, #40862
-; CHECK-NEXT:    str.w r1, [r0, #412]
-; CHECK-NEXT:    movw r1, #41376
-; CHECK-NEXT:    movt r1, #41890
-; CHECK-NEXT:    str.w r1, [r0, #416]
-; CHECK-NEXT:    movw r1, #42404
-; CHECK-NEXT:    movt r1, #42918
-; CHECK-NEXT:    str.w r1, [r0, #420]
-; CHECK-NEXT:    movw r1, #43432
-; CHECK-NEXT:    movt r1, #43946
-; CHECK-NEXT:    str.w r1, [r0, #424]
-; CHECK-NEXT:    movw r1, #44460
-; CHECK-NEXT:    movt r1, #44974
-; CHECK-NEXT:    str.w r1, [r0, #428]
-; CHECK-NEXT:    movw r1, #45488
-; CHECK-NEXT:    strh.w r1, [r0, #432]
+; CHECK-NEXT:    movw r1, #65534
+; CHECK-NEXT:    strh.w r1, [r0, #510]
+; CHECK-NEXT:    movw r1, #64506
+; CHECK-NEXT:    movt r1, #65020
+; CHECK-NEXT:    str.w r1, [r0, #506]
+; CHECK-NEXT:    movw r1, #63478
+; CHECK-NEXT:    movt r1, #63992
+; CHECK-NEXT:    str.w r1, [r0, #502]
+; CHECK-NEXT:    movw r1, #62450
+; CHECK-NEXT:    movt r1, #62964
+; CHECK-NEXT:    str.w r1, [r0, #498]
+; CHECK-NEXT:    movw r1, #61422
+; CHECK-NEXT:    movt r1, #61936
+; CHECK-NEXT:    str.w r1, [r0, #494]
+; CHECK-NEXT:    movw r1, #60394
+; CHECK-NEXT:    movt r1, #60908
+; CHECK-NEXT:    str.w r1, [r0, #490]
+; CHECK-NEXT:    movw r1, #59366
+; CHECK-NEXT:    movt r1, #59880
+; CHECK-NEXT:    str.w r1, [r0, #486]
+; CHECK-NEXT:    movw r1, #58338
+; CHECK-NEXT:    movt r1, #58852
+; CHECK-NEXT:    str.w r1, [r0, #482]
+; CHECK-NEXT:    movw r1, #57310
+; CHECK-NEXT:    movt r1, #57824
+; CHECK-NEXT:    str.w r1, [r0, #478]
+; CHECK-NEXT:    movw r1, #56282
+; CHECK-NEXT:    movt r1, #56796
+; CHECK-NEXT:    str.w r1, [r0, #474]
+; CHECK-NEXT:    movw r1, #55254
+; CHECK-NEXT:    movt r1, #55768
+; CHECK-NEXT:    str.w r1, [r0, #470]
+; CHECK-NEXT:    movw r1, #54226
+; CHECK-NEXT:    movt r1, #54740
+; CHECK-NEXT:    str.w r1, [r0, #466]
+; CHECK-NEXT:    movw r1, #53198
+; CHECK-NEXT:    movt r1, #53712
+; CHECK-NEXT:    str.w r1, [r0, #462]
+; CHECK-NEXT:    movw r1, #52170
+; CHECK-NEXT:    movt r1, #52684
+; CHECK-NEXT:    str.w r1, [r0, #458]
+; CHECK-NEXT:    movw r1, #51142
+; CHECK-NEXT:    movt r1, #51656
+; CHECK-NEXT:    str.w r1, [r0, #454]
+; CHECK-NEXT:    movw r1, #50114
+; CHECK-NEXT:    movt r1, #50628
+; CHECK-NEXT:    str.w r1, [r0, #450]
+; CHECK-NEXT:    movw r1, #49086
+; CHECK-NEXT:    movt r1, #49600
+; CHECK-NEXT:    str.w r1, [r0, #446]
+; CHECK-NEXT:    movw r1, #48058
+; CHECK-NEXT:    movt r1, #48572
+; CHECK-NEXT:    str.w r1, [r0, #442]
+; CHECK-NEXT:    movw r1, #47030
+; CHECK-NEXT:    movt r1, #47544
+; CHECK-NEXT:    str.w r1, [r0, #438]
 ; CHECK-NEXT:    movw r1, #46002
 ; CHECK-NEXT:    movt r1, #46516
 ; CHECK-NEXT:    str.w r1, [r0, #434]
-; CHECK-NEXT:    movw r1, #47030
-; CHECK-NEXT:    strh.w r1, [r0, #438]
-; CHECK-NEXT:    movw r1, #47544
-; CHECK-NEXT:    movt r1, #48058
-; CHECK-NEXT:    str.w r1, [r0, #440]
-; CHECK-NEXT:    movw r1, #48572
-; CHECK-NEXT:    movt r1, #49086
-; CHECK-NEXT:    str.w r1, [r0, #444]
-; CHECK-NEXT:    movw r1, #49600
-; CHECK-NEXT:    strh.w r1, [r0, #448]
-; CHECK-NEXT:    movs r1, #194
-; CHECK-NEXT:    strb.w r1, [r0, #450]
-; CHECK-NEXT:    movw r1, #50371
-; CHECK-NEXT:    movt r1, #50885
-; CHECK-NEXT:    str.w r1, [r0, #451]
-; CHECK-NEXT:    movw r1, #51399
-; CHECK-NEXT:    movt r1, #51913
-; CHECK-NEXT:    str.w r1, [r0, #455]
-; CHECK-NEXT:    movw r1, #52427
-; CHECK-NEXT:    movt r1, #52941
-; CHECK-NEXT:    str.w r1, [r0, #459]
-; CHECK-NEXT:    movw r1, #53455
-; CHECK-NEXT:    movt r1, #53969
-; CHECK-NEXT:    str.w r1, [r0, #463]
-; CHECK-NEXT:    movw r1, #54483
-; CHECK-NEXT:    strh.w r1, [r0, #467]
-; CHECK-NEXT:    movw r1, #54997
-; CHECK-NEXT:    movt r1, #55511
-; CHECK-NEXT:    str.w r1, [r0, #469]
-; CHECK-NEXT:    movw r1, #56025
-; CHECK-NEXT:    movt r1, #56539
-; CHECK-NEXT:    str.w r1, [r0, #473]
-; CHECK-NEXT:    movw r1, #57053
-; CHECK-NEXT:    movt r1, #57567
-; CHECK-NEXT:    str.w r1, [r0, #477]
-; CHECK-NEXT:    movw r1, #58081
-; CHECK-NEXT:    movt r1, #58595
-; CHECK-NEXT:    str.w r1, [r0, #481]
-; CHECK-NEXT:    movw r1, #59109
-; CHECK-NEXT:    movt r1, #59623
-; CHECK-NEXT:    str.w r1, [r0, #485]
-; CHECK-NEXT:    movw r1, #60137
-; CHECK-NEXT:    strh.w r1, [r0, #489]
-; CHECK-NEXT:    movw r1, #60651
-; CHECK-NEXT:    movt r1, #61165
-; CHECK-NEXT:    str.w r1, [r0, #491]
-; CHECK-NEXT:    movw r1, #61679
-; CHECK-NEXT:    strh.w r1, [r0, #495]
-; CHECK-NEXT:    movw r1, #62193
-; CHECK-NEXT:    movt r1, #62707
-; CHECK-NEXT:    str.w r1, [r0, #497]
-; CHECK-NEXT:    movw r1, #63221
-; CHECK-NEXT:    movt r1, #63735
-; CHECK-NEXT:    str.w r1, [r0, #501]
-; CHECK-NEXT:    movw r1, #64249
-; CHECK-NEXT:    strh.w r1, [r0, #505]
-; CHECK-NEXT:    movs r1, #251
-; CHECK-NEXT:    strb.w r1, [r0, #507]
-; CHECK-NEXT:    movw r1, #65020
-; CHECK-NEXT:    movt r1, #65534
-; CHECK-NEXT:    str.w r1, [r0, #508]
+; CHECK-NEXT:    movw r1, #44974
+; CHECK-NEXT:    movt r1, #45488
+; CHECK-NEXT:    str.w r1, [r0, #430]
+; CHECK-NEXT:    movw r1, #43946
+; CHECK-NEXT:    movt r1, #44460
+; CHECK-NEXT:    str.w r1, [r0, #426]
+; CHECK-NEXT:    movw r1, #42918
+; CHECK-NEXT:    movt r1, #43432
+; CHECK-NEXT:    str.w r1, [r0, #422]
+; CHECK-NEXT:    movw r1, #41890
+; CHECK-NEXT:    movt r1, #42404
+; CHECK-NEXT:    str.w r1, [r0, #418]
+; CHECK-NEXT:    movw r1, #40862
+; CHECK-NEXT:    movt r1, #41376
+; CHECK-NEXT:    str.w r1, [r0, #414]
+; CHECK-NEXT:    movw r1, #39834
+; CHECK-NEXT:    movt r1, #40348
+; CHECK-NEXT:    str.w r1, [r0, #410]
+; CHECK-NEXT:    movw r1, #38806
+; CHECK-NEXT:    movt r1, #39320
+; CHECK-NEXT:    str.w r1, [r0, #406]
+; CHECK-NEXT:    movw r1, #37778
+; CHECK-NEXT:    movt r1, #38292
+; CHECK-NEXT:    str.w r1, [r0, #402]
+; CHECK-NEXT:    movw r1, #36750
+; CHECK-NEXT:    movt r1, #37264
+; CHECK-NEXT:    str.w r1, [r0, #398]
+; CHECK-NEXT:    movw r1, #35722
+; CHECK-NEXT:    movt r1, #36236
+; CHECK-NEXT:    str.w r1, [r0, #394]
 ; CHECK-NEXT:    bx lr
   %v190 = getelementptr inbounds i8, i8* %v50, i32 394
   store i8 -118, i8* %v190, align 1

Modified: llvm/trunk/test/CodeGen/ARM/misched-fusion-aes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/misched-fusion-aes.ll?rev=346432&r1=346431&r2=346432&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/misched-fusion-aes.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/misched-fusion-aes.ll Thu Nov  8 11:14:20 2018
@@ -72,20 +72,27 @@ define void @aesea(<16 x i8>* %a0, <16 x
 ; CHECK-LABEL: aesea:
 ; CHECK: aese.8 [[QA:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QA]]
+
 ; CHECK: aese.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QB]]
-; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
+
 ; CHECK: aese.8 [[QC:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QC]]
+
+; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
 ; CHECK: aese.8 [[QD:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QD]]
+
+; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
 ; CHECK: aese.8 [[QE:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QE]]
-; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
+
 ; CHECK: aese.8 [[QF:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QF]]
+
 ; CHECK: aese.8 [[QG:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QG]]
+
 ; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
 ; CHECK: aese.8 [[QH:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QH]]
@@ -160,14 +167,14 @@ define void @aesda(<16 x i8>* %a0, <16 x
 ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QA]]
 ; CHECK: aesd.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QB]]
-; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
 ; CHECK: aesd.8 [[QC:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QC]]
+; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
 ; CHECK: aesd.8 [[QD:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QD]]
+; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
 ; CHECK: aesd.8 [[QE:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QE]]
-; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
 ; CHECK: aesd.8 [[QF:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QF]]
 ; CHECK: aesd.8 [[QG:q[0-9][0-9]?]], {{q[0-9][0-9]?}}

Modified: llvm/trunk/test/CodeGen/Mips/fastcc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Mips/fastcc.ll?rev=346432&r1=346431&r2=346432&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Mips/fastcc.ll (original)
+++ llvm/trunk/test/CodeGen/Mips/fastcc.ll Thu Nov  8 11:14:20 2018
@@ -223,24 +223,24 @@ entry:
 define internal fastcc void @callee1(float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7, float %a8, float %a9, float %a10, float %a11, float %a12, float %a13, float %a14, float %a15, float %a16, float %a17, float %a18, float %a19, float %a20) nounwind noinline {
 entry:
 ; CHECK-LABEL: callee1:
-; CHECK-DAG: swc1  $f0
-; CHECK-DAG: swc1  $f1
-; CHECK-DAG: swc1  $f2
-; CHECK-DAG: swc1  $f3
-; CHECK-DAG: swc1  $f4
-; CHECK-DAG: swc1  $f5
-; CHECK-DAG: swc1  $f6
-; CHECK-DAG: swc1  $f7
-; CHECK-DAG: swc1  $f8
-; CHECK-DAG: swc1  $f9
-; CHECK-DAG: swc1  $f10
-; CHECK-DAG: swc1  $f11
-; CHECK-DAG: swc1  $f12
-; CHECK-DAG: swc1  $f13
-; CHECK-DAG: swc1  $f14
-; CHECK-DAG: swc1  $f15
-; CHECK-DAG: swc1  $f16
 ; CHECK-DAG: swc1  $f17
+; CHECK-DAG: swc1  $f16
+; CHECK-DAG: swc1  $f15
+; CHECK-DAG: swc1  $f14
+; CHECK-DAG: swc1  $f13
+; CHECK-DAG: swc1  $f12
+; CHECK-DAG: swc1  $f11
+; CHECK-DAG: swc1  $f10
+; CHECK-DAG: swc1  $f9
+; CHECK-DAG: swc1  $f8
+; CHECK-DAG: swc1  $f7
+; CHECK-DAG: swc1  $f6
+; CHECK-DAG: swc1  $f5
+; CHECK-DAG: swc1  $f4
+; CHECK-DAG: swc1  $f3
+; CHECK-DAG: swc1  $f2
+; CHECK-DAG: swc1  $f1
+; CHECK-DAG: swc1  $f0
 ; CHECK-DAG: swc1  $f18
 ; CHECK-DAG: swc1  $f19
 
@@ -330,7 +330,7 @@ entry:
 ; NOODDSPREG-DAG:    swc1    $f16, 32($[[R0]])
 ; NOODDSPREG-DAG:    swc1    $f18, 36($[[R0]])
 
-; NOODDSPREG-DAG:    lwc1    $[[F0:f[0-9]*[02468]]], 0($sp)
+; NOODDSPREG-DAG:    lwc1    $[[F0:f[0-9]*[02468]]], {{[0-9]+}}($sp)
 ; NOODDSPREG-DAG:    swc1    $[[F0]], 40($[[R0]])
 
   store float %a0, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 0), align 4

Modified: llvm/trunk/test/CodeGen/SystemZ/pr36164.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/pr36164.ll?rev=346432&r1=346431&r2=346432&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/pr36164.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/pr36164.ll Thu Nov  8 11:14:20 2018
@@ -15,54 +15,39 @@
 define void @main() local_unnamed_addr #0 {
 ; CHECK-LABEL: main:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    stmg %r12, %r15, 96(%r15)
-; CHECK-NEXT:    .cfi_offset %r12, -64
-; CHECK-NEXT:    .cfi_offset %r13, -56
-; CHECK-NEXT:    .cfi_offset %r14, -48
-; CHECK-NEXT:    .cfi_offset %r15, -40
 ; CHECK-NEXT:    lhi %r0, 1
 ; CHECK-NEXT:    larl %r1, g_938
-; CHECK-NEXT:    lhi %r2, 2
-; CHECK-NEXT:    lhi %r3, 3
-; CHECK-NEXT:    lhi %r4, 0
-; CHECK-NEXT:    lhi %r5, 4
-; CHECK-NEXT:    larl %r14, g_11
+; CHECK-NEXT:    lhi %r2, 0
+; CHECK-NEXT:    lhi %r3, 4
+; CHECK-NEXT:    larl %r4, g_11
 ; CHECK-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    strl %r0, g_73
-; CHECK-NEXT:    lrl %r13, g_832
-; CHECK-NEXT:    lrl %r13, g_832
-; CHECK-NEXT:    lrl %r13, g_832
-; CHECK-NEXT:    lrl %r13, g_832
-; CHECK-NEXT:    lrl %r13, g_832
-; CHECK-NEXT:    lrl %r13, g_832
-; CHECK-NEXT:    lrl %r13, g_832
-; CHECK-NEXT:    lrl %r13, g_832
-; CHECK-NEXT:    lrl %r13, g_832
-; CHECK-NEXT:    lrl %r13, g_832
-; CHECK-NEXT:    lrl %r13, g_832
-; CHECK-NEXT:    lrl %r13, g_832
-; CHECK-NEXT:    lrl %r13, g_832
-; CHECK-NEXT:    lrl %r13, g_832
-; CHECK-NEXT:    lrl %r13, g_832
-; CHECK-NEXT:    strl %r0, g_69
-; CHECK-NEXT:    lrl %r13, g_832
-; CHECK-DAG:     lghi %r13, 24
-; CHECK-DAG:     strl %r2, g_69
-; CHECK-DAG:     ag %r13, 0(%r1)
-; CHECK-NEXT:    lrl %r12, g_832
-; CHECK-NEXT:    strl %r3, g_69
-; CHECK-NEXT:    lrl %r12, g_832
-; CHECK-NEXT:    strl %r4, g_69
-; CHECK-NEXT:    lrl %r12, g_832
-; CHECK-NEXT:    strl %r0, g_69
-; CHECK-NEXT:    lrl %r12, g_832
 ; CHECK-NEXT:    strl %r2, g_69
-; CHECK-NEXT:    lrl %r12, g_832
+; CHECK-NEXT:    lrl %r5, g_832
+; CHECK-NEXT:    lrl %r5, g_832
+; CHECK-NEXT:    lrl %r5, g_832
+; CHECK-NEXT:    lrl %r5, g_832
+; CHECK-NEXT:    lrl %r5, g_832
+; CHECK-NEXT:    lrl %r5, g_832
+; CHECK-NEXT:    lrl %r5, g_832
+; CHECK-NEXT:    lrl %r5, g_832
+; CHECK-NEXT:    lrl %r5, g_832
+; CHECK-NEXT:    lrl %r5, g_832
+; CHECK-NEXT:    lrl %r5, g_832
+; CHECK-NEXT:    lrl %r5, g_832
+; CHECK-NEXT:    lrl %r5, g_832
+; CHECK-NEXT:    lrl %r5, g_832
+; CHECK-NEXT:    lrl %r5, g_832
+; CHECK-NEXT:    lrl %r5, g_832
+; CHECK-NEXT:    lrl %r5, g_832
+; CHECK-NEXT:    lrl %r5, g_832
+; CHECK-NEXT:    lrl %r5, g_832
+; CHECK-NEXT:    lrl %r5, g_832
+; CHECK-NEXT:    lrl %r5, g_832
+; CHECK-NEXT:    agsi 0(%r1), 24
+; CHECK-NEXT:    lrl %r5, g_832
 ; CHECK-NEXT:    strl %r3, g_69
-; CHECK-NEXT:    stgrl %r13, g_938
-; CHECK-NEXT:    lrl %r13, g_832
-; CHECK-NEXT:    strl %r5, g_69
-; CHECK-NEXT:    mvi 0(%r14), 1
+; CHECK-NEXT:    mvi 0(%r4), 1
 ; CHECK-NEXT:    j .LBB0_1
   br label %1
 

Modified: llvm/trunk/test/CodeGen/X86/stores-merging.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stores-merging.ll?rev=346432&r1=346431&r2=346432&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stores-merging.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stores-merging.ll Thu Nov  8 11:14:20 2018
@@ -13,9 +13,8 @@
 define void @redundant_stores_merging() {
 ; CHECK-LABEL: redundant_stores_merging:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movabsq $528280977409, %rax # imm = 0x7B00000001
+; CHECK-NEXT:    movabsq $1958505086977, %rax # imm = 0x1C800000001
 ; CHECK-NEXT:    movq %rax, e+{{.*}}(%rip)
-; CHECK-NEXT:    movl $456, e+{{.*}}(%rip) # imm = 0x1C8
 ; CHECK-NEXT:    retq
   store i32 1, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 1), align 4
   store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4




More information about the llvm-commits mailing list