[llvm] r296476 - In visitSTORE, always use FindBetterChain, rather than only when UseAA is enabled.

Chandler Carruth via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 3 01:28:40 PST 2017


I also now have an over 20x compile time regression that this triggers.
Sadly my test case is still huge, but we can reduce it. Especially given
the crasher, i'm going to revert for now.

On Wed, Mar 1, 2017 at 12:48 AM Mikael Holmén via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Hi Nirav,
>
> The attached (llvm-stress generated and then bugpoint reduced) ll-file
> crashes llc with this commit:
>
> llc -march=x86-64 stress_reduced.ll
>
> gives
>
> llc: ../lib/CodeGen/SelectionDAG/SelectionDAG.cpp:746: bool
> llvm::SelectionDAG::RemoveNodeFromCSEMaps(llvm::SDNode *): Assertion
> `N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!"' failed.
> #0 0x0b161bcc llvm::sys::PrintStackTrace(llvm::raw_ostream&)
> /data/repo/llvm-patch/build-all-Debug/../lib/Support/Unix/Signals.inc:402:5
> #1 0x0b161dfe PrintStackTraceSignalHandler(void*)
> /data/repo/llvm-patch/build-all-Debug/../lib/Support/Unix/Signals.inc:466:1
> #2 0x0b15fdff llvm::sys::RunSignalHandlers()
> /data/repo/llvm-patch/build-all-Debug/../lib/Support/Signals.cpp:45:5
> #3 0x0b1625b1 SignalHandler(int)
> /data/repo/llvm-patch/build-all-Debug/../lib/Support/Unix/Signals.inc:256:1
> #4 0xf77c6410  0xffffe410 llvm::SelectionDAG::DeleteNode(llvm::SDNode*)
> #5 0xf77c6410
>
> /data/repo/llvm-patch/build-all-Debug/../lib/CodeGen/SelectionDAG/SelectionDAG.cpp:615:26
>
> #6 0xf77c6410 (anonymous
> namespace)::DAGCombiner::recursivelyDeleteUnusedNodes(llvm::SDNode*)
>
> /data/repo/llvm-patch/build-all-Debug/../lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1260:5
> #7 0x0aecc079 (anonymous
> namespace)::DAGCombiner::Run(llvm::CombineLevel)
>
> /data/repo/llvm-patch/build-all-Debug/../lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1302:9
> #8 0x0ad0cb1f llvm::SelectionDAG::Combine(llvm::CombineLevel,
> llvm::AAResults&, llvm::CodeGenOpt::Level)
>
> /data/repo/llvm-patch/build-all-Debug/../lib/CodeGen/SelectionDAG/DAGCombiner.cpp:16066:3
> #9 0x0ad0c044 llvm::SelectionDAGISel::CodeGenAndEmitDAG()
>
> /data/repo/llvm-patch/build-all-Debug/../lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:871:7
> #10 0x0ad0bcd6
>
> llvm::SelectionDAGISel::SelectBasicBlock(llvm::ilist_iterator<llvm::ilist_detail::node_options<llvm::Instruction,
> true, false, void>, false, true>,
> llvm::ilist_iterator<llvm::ilist_detail::node_options<llvm::Instruction,
> true, false, void>, false, true>, bool&)
>
> /data/repo/llvm-patch/build-all-Debug/../lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:726:1
> #11 0x0af36f98
> llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&)
>
> /data/repo/llvm-patch/build-all-Debug/../lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:1689:5
> #12 0x0af357a7
> llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&)
>
> /data/repo/llvm-patch/build-all-Debug/../lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:543:7
> #13 0x0af3558a (anonymous
> namespace)::X86DAGToDAGISel::runOnMachineFunction(llvm::MachineFunction&)
>
> /data/repo/llvm-patch/build-all-Debug/../lib/Target/X86/X86ISelDAGToDAG.cpp:176:25
> #14 0x0af32754 llvm::MachineFunctionPass::runOnFunction(llvm::Function&)
>
> /data/repo/llvm-patch/build-all-Debug/../lib/CodeGen/MachineFunctionPass.cpp:62:8
> #15 0x09936a8d llvm::FPPassManager::runOnFunction(llvm::Function&)
>
> /data/repo/llvm-patch/build-all-Debug/../lib/IR/LegacyPassManager.cpp:1513:23
> #16 0x0a2c0401 llvm::FPPassManager::runOnModule(llvm::Module&)
>
> /data/repo/llvm-patch/build-all-Debug/../lib/IR/LegacyPassManager.cpp:1534:16
> #17 0x0a7f6c23 (anonymous
> namespace)::MPPassManager::runOnModule(llvm::Module&)
>
> /data/repo/llvm-patch/build-all-Debug/../lib/IR/LegacyPassManager.cpp:1590:23
> #18 0x0a7f6fca llvm::legacy::PassManagerImpl::run(llvm::Module&)
>
> /data/repo/llvm-patch/build-all-Debug/../lib/IR/LegacyPassManager.cpp:1693:16
> #19 0x0a7f788c llvm::legacy::PassManager::run(llvm::Module&)
>
> /data/repo/llvm-patch/build-all-Debug/../lib/IR/LegacyPassManager.cpp:1724:3
> #20 0x0a7f72dc compileModule(char**, llvm::LLVMContext&)
> /data/repo/llvm-patch/build-all-Debug/../tools/llc/llc.cpp:579:42
> #21 0x0a7f7ee6 main
> /data/repo/llvm-patch/build-all-Debug/../tools/llc/llc.cpp:331:13
> #22 0x0860a90b __libc_start_main
> /build/eglibc-X4bnBz/eglibc-2.19/csu/libc-start.c:321:0
>
> build-all-Debug/bin/llc(_ZN4llvm3sys15PrintStackTraceERNS_11raw_ostreamE+0x5c)[0xb161bcc]
> build-all-Debug/bin/llc[0xb161dfe]
> build-all-Debug/bin/llc(_ZN4llvm3sys17RunSignalHandlersEv+0xbf)[0xb15fdff]
> build-all-Debug/bin/llc[0xb1625b1]
> [0xf77c6410]
>
> build-all-Debug/bin/llc(_ZN4llvm12SelectionDAG10DeleteNodeEPNS_6SDNodeE+0x39)[0xaecc079]
> build-all-Debug/bin/llc[0xad0cb1f]
> build-all-Debug/bin/llc[0xad0c044]
>
> build-all-Debug/bin/llc(_ZN4llvm12SelectionDAG7CombineENS_12CombineLevelERNS_9AAResultsENS_10CodeGenOpt5LevelE+0x76)[0xad0bcd6]
>
> build-all-Debug/bin/llc(_ZN4llvm16SelectionDAGISel17CodeGenAndEmitDAGEv+0x17e8)[0xaf36f98]
>
> build-all-Debug/bin/llc(_ZN4llvm16SelectionDAGISel16SelectBasicBlockENS_14ilist_iteratorINS_12ilist_detail12node_optionsINS_11InstructionELb1ELb0EvEELb0ELb1EEES6_Rb+0x137)[0xaf357a7]
>
> build-all-Debug/bin/llc(_ZN4llvm16SelectionDAGISel20SelectAllBasicBlocksERKNS_8FunctionE+0x144a)[0xaf3558a]
>
> build-all-Debug/bin/llc(_ZN4llvm16SelectionDAGISel20runOnMachineFunctionERNS_15MachineFunctionE+0x734)[0xaf32754]
> build-all-Debug/bin/llc[0x9936a8d]
>
> build-all-Debug/bin/llc(_ZN4llvm19MachineFunctionPass13runOnFunctionERNS_8FunctionE+0x251)[0xa2c0401]
>
> build-all-Debug/bin/llc(_ZN4llvm13FPPassManager13runOnFunctionERNS_8FunctionE+0x1b3)[0xa7f6c23]
>
> build-all-Debug/bin/llc(_ZN4llvm13FPPassManager11runOnModuleERNS_6ModuleE+0xaa)[0xa7f6fca]
> build-all-Debug/bin/llc[0xa7f788c]
>
> build-all-Debug/bin/llc(_ZN4llvm6legacy15PassManagerImpl3runERNS_6ModuleE+0x14c)[0xa7f72dc]
>
> build-all-Debug/bin/llc(_ZN4llvm6legacy11PassManager3runERNS_6ModuleE+0x36)[0xa7f7ee6]
> build-all-Debug/bin/llc[0x860a90b]
> build-all-Debug/bin/llc(main+0x5b0)[0x86084e0]
> /lib/i386-linux-gnu/libc.so.6(__libc_start_main+0xf3)[0xf7493af3]
> Stack dump:
> 0.      Program arguments: build-all-Debug/bin/llc -march=x86-64
> stress_reduced.ll
> 1.      Running pass 'Function Pass Manager' on module 'stress_reduced.ll'.
> 2.      Running pass 'X86 DAG->DAG Instruction Selection' on function
> '@autogen_SD1794'
> Abort
>
> Regards,
> Mikael
>
>
> On 02/28/2017 03:24 PM, Nirav Dave via llvm-commits wrote:
> > Author: niravd
> > Date: Tue Feb 28 08:24:15 2017
> > New Revision: 296476
> >
> > URL: http://llvm.org/viewvc/llvm-project?rev=296476&view=rev
> > Log:
> > In visitSTORE, always use FindBetterChain, rather than only when UseAA
> is enabled.
> >
> >     Recommiting after fixup of 32-bit aliasing sign offset bug in
> DAGCombiner.
> >
> >     * Simplify Consecutive Merge Store Candidate Search
> >
> >     Now that address aliasing is much less conservative, push through
> >     simplified store merging search and chain alias analysis which only
> >     checks for parallel stores through the chain subgraph. This is
> cleaner
> >     as the separation of non-interfering loads/stores from the
> >     store-merging logic.
> >
> >     When merging stores search up the chain through a single load, and
> >     finds all possible stores by looking down from through a load and a
> >     TokenFactor to all stores visited.
> >
> >     This improves the quality of the output SelectionDAG and the output
> >     Codegen (save perhaps for some ARM cases where we correctly
> constructs
> >     wider loads, but then promotes them to float operations which appear
> >     but requires more expensive constant generation).
> >
> >     Some minor peephole optimizations to deal with improved SubDAG
> shapes (listed below)
> >
> >     Additional Minor Changes:
> >
> >       1. Finishes removing unused AliasLoad code
> >
> >       2. Unifies the chain aggregation in the merged stores across code
> >          paths
> >
> >       3. Re-add the Store node to the worklist after calling
> >          SimplifyDemandedBits.
> >
> >       4. Increase GatherAllAliasesMaxDepth from 6 to 18. That number is
> >          arbitrary, but seems sufficient to not cause regressions in
> >          tests.
> >
> >       5. Remove Chain dependencies of Memory operations on CopyfromReg
> >          nodes as these are captured by data dependence
> >
> >       6. Forward loads-store values through tokenfactors containing
> >           {CopyToReg,CopyFromReg} Values.
> >
> >       7. Peephole to convert buildvector of extract_vector_elt to
> >          extract_subvector if possible (see
> >          CodeGen/AArch64/store-merge.ll)
> >
> >       8. Store merging for the ARM target is restricted to 32-bit as
> >          some in some contexts invalid 64-bit operations are being
> >          generated. This can be removed once appropriate checks are
> >          added.
> >
> >     This finishes the change Matt Arsenault started in r246307 and
> >     jyknight's original patch.
> >
> >     Many tests required some changes as memory operations are now
> >     reorderable, improving load-store forwarding. One test in
> >     particular is worth noting:
> >
> >       CodeGen/PowerPC/ppc64-align-long-double.ll - Improved load-store
> >       forwarding converts a load-store pair into a parallel store and
> >       a memory-realized bitcast of the same value. However, because we
> >       lose the sharing of the explicit and implicit store values we
> >       must create another local store. A similar transformation
> >       happens before SelectionDAG as well.
> >
> >     Reviewers: arsenm, hfinkel, tstellarAMD, jyknight, nhaehnle
> >
> > Removed:
> >     llvm/trunk/test/CodeGen/X86/combiner-aa-0.ll
> >     llvm/trunk/test/CodeGen/X86/combiner-aa-1.ll
> >     llvm/trunk/test/CodeGen/X86/pr18023.ll
> > Modified:
> >     llvm/trunk/include/llvm/Target/TargetLowering.h
> >     llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> >     llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp
> >     llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
> >     llvm/trunk/lib/Target/ARM/ARMISelLowering.h
> >     llvm/trunk/test/CodeGen/AArch64/argument-blocks.ll
> >     llvm/trunk/test/CodeGen/AArch64/arm64-abi.ll
> >     llvm/trunk/test/CodeGen/AArch64/arm64-memset-inline.ll
> >     llvm/trunk/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
> >     llvm/trunk/test/CodeGen/AArch64/merge-store.ll
> >     llvm/trunk/test/CodeGen/AArch64/vector_merge_dep_check.ll
> >     llvm/trunk/test/CodeGen/AMDGPU/debugger-insert-nops.ll
> >     llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll
> >     llvm/trunk/test/CodeGen/AMDGPU/merge-stores.ll
> >     llvm/trunk/test/CodeGen/AMDGPU/private-element-size.ll
> >     llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
> >     llvm/trunk/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
> >     llvm/trunk/test/CodeGen/ARM/alloc-no-stack-realign.ll
> >     llvm/trunk/test/CodeGen/ARM/gpr-paired-spill.ll
> >     llvm/trunk/test/CodeGen/ARM/ifcvt10.ll
> >     llvm/trunk/test/CodeGen/ARM/illegal-bitfield-loadstore.ll
> >     llvm/trunk/test/CodeGen/ARM/static-addr-hoisting.ll
> >     llvm/trunk/test/CodeGen/BPF/undef.ll
> >     llvm/trunk/test/CodeGen/MSP430/Inst16mm.ll
> >     llvm/trunk/test/CodeGen/Mips/cconv/arguments-float.ll
> >     llvm/trunk/test/CodeGen/Mips/cconv/arguments-varargs.ll
> >     llvm/trunk/test/CodeGen/Mips/fastcc.ll
> >     llvm/trunk/test/CodeGen/Mips/load-store-left-right.ll
> >     llvm/trunk/test/CodeGen/Mips/micromips-li.ll
> >     llvm/trunk/test/CodeGen/Mips/mips64-f128-call.ll
> >     llvm/trunk/test/CodeGen/Mips/mips64-f128.ll
> >     llvm/trunk/test/CodeGen/Mips/mno-ldc1-sdc1.ll
> >     llvm/trunk/test/CodeGen/Mips/msa/f16-llvm-ir.ll
> >     llvm/trunk/test/CodeGen/Mips/msa/i5_ld_st.ll
> >     llvm/trunk/test/CodeGen/Mips/o32_cc_byval.ll
> >     llvm/trunk/test/CodeGen/Mips/o32_cc_vararg.ll
> >     llvm/trunk/test/CodeGen/PowerPC/anon_aggr.ll
> >     llvm/trunk/test/CodeGen/PowerPC/complex-return.ll
> >     llvm/trunk/test/CodeGen/PowerPC/jaggedstructs.ll
> >     llvm/trunk/test/CodeGen/PowerPC/ppc64-align-long-double.ll
> >     llvm/trunk/test/CodeGen/PowerPC/structsinmem.ll
> >     llvm/trunk/test/CodeGen/PowerPC/structsinregs.ll
> >     llvm/trunk/test/CodeGen/SystemZ/unaligned-01.ll
> >     llvm/trunk/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
> >     llvm/trunk/test/CodeGen/Thumb/stack-access.ll
> >     llvm/trunk/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
> >     llvm/trunk/test/CodeGen/X86/2012-11-28-merge-store-alias.ll
> >     llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
> >     llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
> >     llvm/trunk/test/CodeGen/X86/chain_order.ll
> >     llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll
> >     llvm/trunk/test/CodeGen/X86/copy-eflags.ll
> >     llvm/trunk/test/CodeGen/X86/dag-merge-fast-accesses.ll
> >     llvm/trunk/test/CodeGen/X86/dont-trunc-store-double-to-float.ll
> >
>  llvm/trunk/test/CodeGen/X86/extractelement-legalization-store-ordering.ll
> >     llvm/trunk/test/CodeGen/X86/i256-add.ll
> >     llvm/trunk/test/CodeGen/X86/i386-shrink-wrapping.ll
> >     llvm/trunk/test/CodeGen/X86/illegal-bitfield-loadstore.ll
> >     llvm/trunk/test/CodeGen/X86/live-range-nosubreg.ll
> >     llvm/trunk/test/CodeGen/X86/longlong-deadload.ll
> >     llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll
> >     llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-256.ll
> >     llvm/trunk/test/CodeGen/X86/merge-store-partially-alias-loads.ll
> >     llvm/trunk/test/CodeGen/X86/split-store.ll
> >     llvm/trunk/test/CodeGen/X86/stores-merging.ll
> >     llvm/trunk/test/CodeGen/X86/vector-compare-results.ll
> >     llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll
> >     llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-256.ll
> >     llvm/trunk/test/CodeGen/X86/vectorcall.ll
> >     llvm/trunk/test/CodeGen/X86/win32-eh.ll
> >     llvm/trunk/test/CodeGen/XCore/varargs.ll
> >
> > Modified: llvm/trunk/include/llvm/Target/TargetLowering.h
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=296476&r1=296475&r2=296476&view=diff
> >
> ==============================================================================
> > --- llvm/trunk/include/llvm/Target/TargetLowering.h (original)
> > +++ llvm/trunk/include/llvm/Target/TargetLowering.h Tue Feb 28 08:24:15
> 2017
> > @@ -363,6 +363,9 @@ public:
> >      return false;
> >    }
> >
> > +  /// Returns if it's reasonable to merge stores to MemVT size.
> > +  virtual bool canMergeStoresTo(EVT MemVT) const { return true; }
> > +
> >    /// \brief Return true if it is cheap to speculate a call to
> intrinsic cttz.
> >    virtual bool isCheapToSpeculateCttz() const {
> >      return false;
> >
> > Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=296476&r1=296475&r2=296476&view=diff
> >
> ==============================================================================
> > --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> > +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Feb 28
> 08:24:15 2017
> > @@ -53,10 +53,6 @@ STATISTIC(SlicedLoads, "Number of load s
> >
> >  namespace {
> >    static cl::opt<bool>
> > -    CombinerAA("combiner-alias-analysis", cl::Hidden,
> > -               cl::desc("Enable DAG combiner alias-analysis
> heuristics"));
> > -
> > -  static cl::opt<bool>
> >      CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
> >                 cl::desc("Enable DAG combiner's use of IR alias
> analysis"));
> >
> > @@ -419,15 +415,12 @@ namespace {
> >      /// Holds a pointer to an LSBaseSDNode as well as information on
> where it
> >      /// is located in a sequence of memory operations connected by a
> chain.
> >      struct MemOpLink {
> > -      MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
> > -      MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
> > +      MemOpLink(LSBaseSDNode *N, int64_t Offset)
> > +          : MemNode(N), OffsetFromBase(Offset) {}
> >        // Ptr to the mem node.
> >        LSBaseSDNode *MemNode;
> >        // Offset from the base ptr.
> >        int64_t OffsetFromBase;
> > -      // What is the sequence number of this mem node.
> > -      // Lowest mem operand in the DAG starts at zero.
> > -      unsigned SequenceNum;
> >      };
> >
> >      /// This is a helper function for visitMUL to check the
> profitability
> > @@ -438,12 +431,6 @@ namespace {
> >                                       SDValue &AddNode,
> >                                       SDValue &ConstNode);
> >
> > -    /// This is a helper function for MergeStoresOfConstantsOrVecElts.
> Returns a
> > -    /// constant build_vector of the stored constant values in Stores.
> > -    SDValue getMergedConstantVectorStore(SelectionDAG &DAG, const SDLoc
> &SL,
> > -                                         ArrayRef<MemOpLink> Stores,
> > -                                         SmallVectorImpl<SDValue>
> &Chains,
> > -                                         EVT Ty) const;
> >
> >      /// This is a helper function for visitAND and visitZERO_EXTEND.
> Returns
> >      /// true if the (and (load x) c) pattern matches an extload.  ExtVT
> returns
> > @@ -457,18 +444,15 @@ namespace {
> >      /// This is a helper function for MergeConsecutiveStores. When the
> source
> >      /// elements of the consecutive stores are all constants or all
> extracted
> >      /// vector elements, try to merge them into one larger store.
> > -    /// \return number of stores that were merged into a merged store
> (always
> > -    /// a prefix of \p StoreNode).
> > -    bool MergeStoresOfConstantsOrVecElts(
> > -        SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned
> NumStores,
> > -        bool IsConstantSrc, bool UseVector);
> > +    /// \return True if a merged store was created.
> > +    bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink>
> &StoreNodes,
> > +                                         EVT MemVT, unsigned NumStores,
> > +                                         bool IsConstantSrc, bool
> UseVector);
> >
> >      /// This is a helper function for MergeConsecutiveStores.
> >      /// Stores that may be merged are placed in StoreNodes.
> > -    /// Loads that may alias with those stores are placed in
> AliasLoadNodes.
> > -    void getStoreMergeAndAliasCandidates(
> > -        StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
> > -        SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
> > +    void getStoreMergeCandidates(StoreSDNode *St,
> > +                                 SmallVectorImpl<MemOpLink>
> &StoreNodes);
> >
> >      /// Helper function for MergeConsecutiveStores. Checks if
> >      /// Candidate stores have indirect dependency through their
> > @@ -480,8 +464,7 @@ namespace {
> >      /// This optimization uses wide integers or vectors when possible.
> >      /// \return number of stores that were merged into a merged store
> (the
> >      /// affected nodes are stored as a prefix in \p StoreNodes).
> > -    bool MergeConsecutiveStores(StoreSDNode *N,
> > -                                SmallVectorImpl<MemOpLink> &StoreNodes);
> > +    bool MergeConsecutiveStores(StoreSDNode *N);
> >
> >      /// \brief Try to transform a truncation where C is a constant:
> >      ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
> > @@ -1584,7 +1567,7 @@ SDValue DAGCombiner::visitTokenFactor(SD
> >    }
> >
> >    SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
> > -  SmallVector<SDValue, 8> Ops;    // Ops for replacing token factor.
> > +  SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
> >    SmallPtrSet<SDNode*, 16> SeenOps;
> >    bool Changed = false;             // If we should replace this token
> factor.
> >
> > @@ -1628,6 +1611,86 @@ SDValue DAGCombiner::visitTokenFactor(SD
> >      }
> >    }
> >
> > +  // Remove Nodes that are chained to another node in the list. Do so
> > +  // by walking up chains breath-first stopping when we've seen
> > +  // another operand. In general we must climb to the EntryNode, but we
> can exit
> > +  // early if we find all remaining work is associated with just one
> operand as
> > +  // no further pruning is possible.
> > +
> > +  // List of nodes to search through and original Ops from which they
> originate.
> > +  SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
> > +  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
> > +  SmallPtrSet<SDNode *, 16> SeenChains;
> > +  bool DidPruneOps = false;
> > +
> > +  unsigned NumLeftToConsider = 0;
> > +  for (const SDValue &Op : Ops) {
> > +    Worklist.push_back(std::make_pair(Op.getNode(),
> NumLeftToConsider++));
> > +    OpWorkCount.push_back(1);
> > +  }
> > +
> > +  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned
> OpNumber) {
> > +    // If this is an Op, we can remove the op from the list. Remark any
> > +    // search associated with it as from the current OpNumber.
> > +    if (SeenOps.count(Op) != 0) {
> > +      Changed = true;
> > +      DidPruneOps = true;
> > +      unsigned OrigOpNumber = 0;
> > +      while (Ops[OrigOpNumber].getNode() != Op && OrigOpNumber <
> Ops.size())
> > +        OrigOpNumber++;
> > +      assert((OrigOpNumber != Ops.size()) &&
> > +             "expected to find TokenFactor Operand");
> > +      // Re-mark worklist from OrigOpNumber to OpNumber
> > +      for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
> > +        if (Worklist[i].second == OrigOpNumber) {
> > +          Worklist[i].second = OpNumber;
> > +        }
> > +      }
> > +      OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
> > +      OpWorkCount[OrigOpNumber] = 0;
> > +      NumLeftToConsider--;
> > +    }
> > +    // Add if it's a new chain
> > +    if (SeenChains.insert(Op).second) {
> > +      OpWorkCount[OpNumber]++;
> > +      Worklist.push_back(std::make_pair(Op, OpNumber));
> > +    }
> > +  };
> > +
> > +  for (unsigned i = 0; i < Worklist.size(); ++i) {
> > +    // We need at least be consider at least 2 Ops to prune.
> > +    if (NumLeftToConsider <= 1)
> > +      break;
> > +    auto CurNode = Worklist[i].first;
> > +    auto CurOpNumber = Worklist[i].second;
> > +    assert((OpWorkCount[CurOpNumber] > 0) &&
> > +           "Node should not appear in worklist");
> > +    switch (CurNode->getOpcode()) {
> > +    case ISD::EntryToken:
> > +      // Hitting EntryToken is the only way for the search to terminate
> without
> > +      // hitting
> > +      // another operand's search. Prevent us from marking this operand
> > +      // considered.
> > +      NumLeftToConsider++;
> > +      break;
> > +    case ISD::TokenFactor:
> > +      for (const SDValue &Op : CurNode->op_values())
> > +        AddToWorklist(i, Op.getNode(), CurOpNumber);
> > +      break;
> > +    case ISD::CopyFromReg:
> > +    case ISD::CopyToReg:
> > +      AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
> > +      break;
> > +    default:
> > +      if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
> > +        AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
> > +      break;
> > +    }
> > +    OpWorkCount[CurOpNumber]--;
> > +    if (OpWorkCount[CurOpNumber] == 0)
> > +      NumLeftToConsider--;
> > +  }
> > +
> >    SDValue Result;
> >
> >    // If we've changed things around then replace token factor.
> > @@ -1636,15 +1699,22 @@ SDValue DAGCombiner::visitTokenFactor(SD
> >        // The entry token is the only possible outcome.
> >        Result = DAG.getEntryNode();
> >      } else {
> > -      // New and improved token factor.
> > -      Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
> > +      if (DidPruneOps) {
> > +        SmallVector<SDValue, 8> PrunedOps;
> > +        //
> > +        for (const SDValue &Op : Ops) {
> > +          if (SeenChains.count(Op.getNode()) == 0)
> > +            PrunedOps.push_back(Op);
> > +        }
> > +        Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
> PrunedOps);
> > +      } else {
> > +        Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
> Ops);
> > +      }
> >      }
> >
> > -    // Add users to worklist if AA is enabled, since it may introduce
> > -    // a lot of new chained token factors while removing memory deps.
> > -    bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
> > -      : DAG.getSubtarget().useAA();
> > -    return CombineTo(N, Result, UseAA /*add to worklist*/);
> > +    // Add users to worklist, since we may introduce a lot of new
> > +    // chained token factors while removing memory deps.
> > +    return CombineTo(N, Result, true /*add to worklist*/);
> >    }
> >
> >    return Result;
> > @@ -6583,6 +6653,9 @@ SDValue DAGCombiner::CombineExtLoad(SDNo
> >    SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
> Chains);
> >    SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
> >
> > +  // Simplify TF.
> > +  AddToWorklist(NewChain.getNode());
> > +
> >    CombineTo(N, NewValue);
> >
> >    // Replace uses of the original load (before extension)
> > @@ -10761,11 +10834,12 @@ SDValue DAGCombiner::visitLOAD(SDNode *N
> >    // TODO: Handle TRUNCSTORE/LOADEXT
> >    if (OptLevel != CodeGenOpt::None &&
> >        ISD::isNormalLoad(N) && !LD->isVolatile()) {
> > +    // We can forward a direct store or a store off of a tokenfactor.
> >      if (ISD::isNON_TRUNCStore(Chain.getNode())) {
> >        StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
> >        if (PrevST->getBasePtr() == Ptr &&
> >            PrevST->getValue().getValueType() == N->getValueType(0))
> > -      return CombineTo(N, Chain.getOperand(1), Chain);
> > +        return CombineTo(N, PrevST->getOperand(1), Chain);
> >      }
> >    }
> >
> > @@ -10783,14 +10857,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N
> >      }
> >    }
> >
> > -  bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
> > -                                                  :
> DAG.getSubtarget().useAA();
> > -#ifndef NDEBUG
> > -  if (CombinerAAOnlyFunc.getNumOccurrences() &&
> > -      CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
> > -    UseAA = false;
> > -#endif
> > -  if (UseAA && LD->isUnindexed()) {
> > +  if (LD->isUnindexed()) {
> >      // Walk up chain skipping non-aliasing memory nodes.
> >      SDValue BetterChain = FindBetterChain(N, Chain);
> >
> > @@ -11372,6 +11439,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N)
> >    SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
> >                                ArgChains);
> >    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
> > +  AddToWorklist(Chain.getNode());
> >    return true;
> >  }
> >
> > @@ -11765,20 +11833,6 @@ bool DAGCombiner::isMulAddWithConstProfi
> >    return false;
> >  }
> >
> > -SDValue DAGCombiner::getMergedConstantVectorStore(
> > -    SelectionDAG &DAG, const SDLoc &SL, ArrayRef<MemOpLink> Stores,
> > -    SmallVectorImpl<SDValue> &Chains, EVT Ty) const {
> > -  SmallVector<SDValue, 8> BuildVector;
> > -
> > -  for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
> > -    StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode);
> > -    Chains.push_back(St->getChain());
> > -    BuildVector.push_back(St->getValue());
> > -  }
> > -
> > -  return DAG.getBuildVector(Ty, SL, BuildVector);
> > -}
> > -
> >  bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
> >                    SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
> >                    unsigned NumStores, bool IsConstantSrc, bool
> UseVector) {
> > @@ -11787,22 +11841,8 @@ bool DAGCombiner::MergeStoresOfConstants
> >      return false;
> >
> >    int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
> > -  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
> > -  unsigned LatestNodeUsed = 0;
> > -
> > -  for (unsigned i=0; i < NumStores; ++i) {
> > -    // Find a chain for the new wide-store operand. Notice that some
> > -    // of the store nodes that we found may not be selected for
> inclusion
> > -    // in the wide store. The chain we use needs to be the chain of the
> > -    // latest store node which is *used* and replaced by the wide store.
> > -    if (StoreNodes[i].SequenceNum <
> StoreNodes[LatestNodeUsed].SequenceNum)
> > -      LatestNodeUsed = i;
> > -  }
> > -
> > -  SmallVector<SDValue, 8> Chains;
> >
> >    // The latest Node in the DAG.
> > -  LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
> >    SDLoc DL(StoreNodes[0].MemNode);
> >
> >    SDValue StoredVal;
> > @@ -11818,7 +11858,18 @@ bool DAGCombiner::MergeStoresOfConstants
> >      assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
> >
> >      if (IsConstantSrc) {
> > -      StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes,
> Chains, Ty);
> > +      SmallVector<SDValue, 8> BuildVector;
> > +      for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
> > +        StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
> > +        SDValue Val = St->getValue();
> > +        if (MemVT.getScalarType().isInteger())
> > +          if (auto *CFP = dyn_cast<ConstantFPSDNode>(St->getValue()))
> > +            Val = DAG.getConstant(
> > +
> (uint32_t)CFP->getValueAPF().bitcastToAPInt().getZExtValue(),
> > +                SDLoc(CFP), MemVT);
> > +        BuildVector.push_back(Val);
> > +      }
> > +      StoredVal = DAG.getBuildVector(Ty, DL, BuildVector);
> >      } else {
> >        SmallVector<SDValue, 8> Ops;
> >        for (unsigned i = 0; i < NumStores; ++i) {
> > @@ -11828,7 +11879,6 @@ bool DAGCombiner::MergeStoresOfConstants
> >          if (Val.getValueType() != MemVT)
> >            return false;
> >          Ops.push_back(Val);
> > -        Chains.push_back(St->getChain());
> >        }
> >
> >        // Build the extracted vector elements back into a vector.
> > @@ -11848,7 +11898,6 @@ bool DAGCombiner::MergeStoresOfConstants
> >      for (unsigned i = 0; i < NumStores; ++i) {
> >        unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
> >        StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
> > -      Chains.push_back(St->getChain());
> >
> >        SDValue Val = St->getValue();
> >        StoreInt <<= ElementSizeBytes * 8;
> > @@ -11866,54 +11915,36 @@ bool DAGCombiner::MergeStoresOfConstants
> >      StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
> >    }
> >
> > -  assert(!Chains.empty());
> > +  SmallVector<SDValue, 8> Chains;
> > +
> > +  // Gather all Chains we're inheriting. As generally all chains are
> > +  // equal, do minor check to remove obvious redundancies.
> > +  Chains.push_back(StoreNodes[0].MemNode->getChain());
> > +  for (unsigned i = 1; i < NumStores; ++i)
> > +    if (StoreNodes[0].MemNode->getChain() !=
> StoreNodes[i].MemNode->getChain())
> > +      Chains.push_back(StoreNodes[i].MemNode->getChain());
> >
> > +  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
> >    SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
> Chains);
> >    SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal,
> >                                    FirstInChain->getBasePtr(),
> >                                    FirstInChain->getPointerInfo(),
> >                                    FirstInChain->getAlignment());
> >
> > -  bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
> > -                                                  :
> DAG.getSubtarget().useAA();
> > -  if (UseAA) {
> > -    // Replace all merged stores with the new store.
> > -    for (unsigned i = 0; i < NumStores; ++i)
> > -      CombineTo(StoreNodes[i].MemNode, NewStore);
> > -  } else {
> > -    // Replace the last store with the new store.
> > -    CombineTo(LatestOp, NewStore);
> > -    // Erase all other stores.
> > -    for (unsigned i = 0; i < NumStores; ++i) {
> > -      if (StoreNodes[i].MemNode == LatestOp)
> > -        continue;
> > -      StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
> > -      // ReplaceAllUsesWith will replace all uses that existed when it
> was
> > -      // called, but graph optimizations may cause new ones to appear.
> For
> > -      // example, the case in pr14333 looks like
> > -      //
> > -      //  St's chain -> St -> another store -> X
> > -      //
> > -      // And the only difference from St to the other store is the
> chain.
> > -      // When we change it's chain to be St's chain they become
> identical,
> > -      // get CSEed and the net result is that X is now a use of St.
> > -      // Since we know that St is redundant, just iterate.
> > -      while (!St->use_empty())
> > -        DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
> > -      deleteAndRecombine(St);
> > -    }
> > -  }
> > +  // Replace all merged stores with the new store.
> > +  for (unsigned i = 0; i < NumStores; ++i)
> > +    CombineTo(StoreNodes[i].MemNode, NewStore);
> >
> > -  StoreNodes.erase(StoreNodes.begin() + NumStores, StoreNodes.end());
> > +  AddToWorklist(NewChain.getNode());
> >    return true;
> >  }
> >
> > -void DAGCombiner::getStoreMergeAndAliasCandidates(
> > -    StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
> > -    SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) {
> > +void DAGCombiner::getStoreMergeCandidates(
> > +    StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
> >    // This holds the base pointer, index, and the offset in bytes from
> the base
> >    // pointer.
> >    BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(),
> DAG);
> > +  EVT MemVT = St->getMemoryVT();
> >
> >    // We must have a base and an offset.
> >    if (!BasePtr.Base.getNode())
> > @@ -11923,104 +11954,70 @@ void DAGCombiner::getStoreMergeAndAliasC
> >    if (BasePtr.Base.isUndef())
> >      return;
> >
> > -  // Walk up the chain and look for nodes with offsets from the same
> > -  // base pointer. Stop when reaching an instruction with a different
> kind
> > -  // or instruction which has a different base pointer.
> > -  EVT MemVT = St->getMemoryVT();
> > -  unsigned Seq = 0;
> > -  StoreSDNode *Index = St;
> > -
> > -
> > -  bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
> > -                                                  :
> DAG.getSubtarget().useAA();
> > -
> > -  if (UseAA) {
> > -    // Look at other users of the same chain. Stores on the same chain
> do not
> > -    // alias. If combiner-aa is enabled, non-aliasing stores are
> canonicalized
> > -    // to be on the same chain, so don't bother looking at adjacent
> chains.
> > -
> > -    SDValue Chain = St->getChain();
> > -    for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E;
> ++I) {
> > -      if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
> > -        if (I.getOperandNo() != 0)
> > -          continue;
> > -
> > -        if (OtherST->isVolatile() || OtherST->isIndexed())
> > -          continue;
> > -
> > -        if (OtherST->getMemoryVT() != MemVT)
> > -          continue;
> > -
> > -        BaseIndexOffset Ptr =
> BaseIndexOffset::match(OtherST->getBasePtr(), DAG);
> > -
> > -        if (Ptr.equalBaseIndex(BasePtr))
> > -          StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++));
> > -      }
> > -    }
> > -
> > -    return;
> > -  }
> > -
> > -  while (Index) {
> > -    // If the chain has more than one use, then we can't reorder the
> mem ops.
> > -    if (Index != St && !SDValue(Index, 0)->hasOneUse())
> > -      break;
> > -
> > -    // Find the base pointer and offset for this memory node.
> > -    BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(),
> DAG);
> > -
> > -    // Check that the base pointer is the same as the original one.
> > -    if (!Ptr.equalBaseIndex(BasePtr))
> > -      break;
> > +  // We looking for a root node which is an ancestor to all mergable
> > +  // stores. We search up through a load, to our root and then down
> > +  // through all children. For instance we will find Store{1,2,3} if
> > +  // St is Store1, Store2. or Store3 where the root is not a load
> > +  // which always true for nonvolatile ops. TODO: Expand
> > +  // the search to find all valid candidates through multiple layers of
> loads.
> > +  //
> > +  // Root
> > +  // |-------|-------|
> > +  // Load    Load    Store3
> > +  // |       |
> > +  // Store1   Store2
> > +  //
> > +  // FIXME: We should be able to climb and
> > +  // descend TokenFactors to find candidates as well.
> >
> > -    // The memory operands must not be volatile.
> > -    if (Index->isVolatile() || Index->isIndexed())
> > -      break;
> > +  SDNode *RootNode = (St->getChain()).getNode();
> >
> > -    // No truncation.
> > -    if (Index->isTruncatingStore())
> > -      break;
> > +  // Set of Parents of Candidates
> > +  std::set<SDNode *> CandidateParents;
> >
> > -    // The stored memory type must be the same.
> > -    if (Index->getMemoryVT() != MemVT)
> > -      break;
> > -
> > -    // We do not allow under-aligned stores in order to prevent
> > -    // overriding stores. NOTE: this is a bad hack. Alignment SHOULD
> > -    // be irrelevant here; what MATTERS is that we not move memory
> > -    // operations that potentially overlap past each-other.
> > -    if (Index->getAlignment() < MemVT.getStoreSize())
> > -      break;
> > +  if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
> > +    RootNode = Ldn->getChain().getNode();
> > +    for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I !=
> E; ++I)
> > +      if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down
> chain
> > +        CandidateParents.insert(*I);
> > +  } else
> > +    CandidateParents.insert(RootNode);
> >
> > -    // We found a potential memory operand to merge.
> > -    StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
> > +  bool IsLoadSrc = isa<LoadSDNode>(St->getValue());
> > +  bool IsConstantSrc = isa<ConstantSDNode>(St->getValue()) ||
> > +                       isa<ConstantFPSDNode>(St->getValue());
> > +  bool IsExtractVecSrc =
> > +      (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
> > +       St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR);
> > +  auto CorrectValueKind = [&](StoreSDNode *Other) -> bool {
> > +    if (IsLoadSrc)
> > +      return isa<LoadSDNode>(Other->getValue());
> > +    if (IsConstantSrc)
> > +      return (isa<ConstantSDNode>(Other->getValue()) ||
> > +              isa<ConstantFPSDNode>(Other->getValue()));
> > +    if (IsExtractVecSrc)
> > +      return (Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT
> ||
> > +              Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR);
> > +    return false;
> > +  };
> >
> > -    // Find the next memory operand in the chain. If the next operand
> in the
> > -    // chain is a store then move up and continue the scan with the next
> > -    // memory operand. If the next operand is a load save it and use
> alias
> > -    // information to check if it interferes with anything.
> > -    SDNode *NextInChain = Index->getChain().getNode();
> > -    while (1) {
> > -      if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
> > -        // We found a store node. Use it for the next iteration.
> > -        Index = STn;
> > -        break;
> > -      } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
> > -        if (Ldn->isVolatile()) {
> > -          Index = nullptr;
> > -          break;
> > +  // check all parents of mergable children
> > +  for (auto P = CandidateParents.begin(); P != CandidateParents.end();
> ++P)
> > +    for (auto I = (*P)->use_begin(), E = (*P)->use_end(); I != E; ++I)
> > +      if (I.getOperandNo() == 0)
> > +        if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
> > +          if (OtherST->isVolatile() || OtherST->isIndexed())
> > +            continue;
> > +          // We can merge constant floats to equivalent integers
> > +          if (OtherST->getMemoryVT() != MemVT)
> > +            if (!(MemVT.isInteger() &&
> MemVT.bitsEq(OtherST->getMemoryVT()) &&
> > +                  isa<ConstantFPSDNode>(OtherST->getValue())))
> > +              continue;
> > +          BaseIndexOffset Ptr =
> > +              BaseIndexOffset::match(OtherST->getBasePtr(), DAG);
> > +          if (Ptr.equalBaseIndex(BasePtr) && CorrectValueKind(OtherST))
> > +            StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset));
> >          }
> > -
> > -        // Save the load node for later. Continue the scan.
> > -        AliasLoadNodes.push_back(Ldn);
> > -        NextInChain = Ldn->getChain().getNode();
> > -        continue;
> > -      } else {
> > -        Index = nullptr;
> > -        break;
> > -      }
> > -    }
> > -  }
> >  }
> >
> >  // We need to check that merging these stores does not cause a loop
> > @@ -12047,8 +12044,7 @@ bool DAGCombiner::checkMergeStoreCandida
> >    return true;
> >  }
> >
> > -bool DAGCombiner::MergeConsecutiveStores(
> > -    StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes) {
> > +bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
> >    if (OptLevel == CodeGenOpt::None)
> >      return false;
> >
> > @@ -12082,145 +12078,136 @@ bool DAGCombiner::MergeConsecutiveStores
> >    if (MemVT.isVector() && IsLoadSrc)
> >      return false;
> >
> > -  // Only look at ends of store sequences.
> > -  SDValue Chain = SDValue(St, 0);
> > -  if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() ==
> ISD::STORE)
> > -    return false;
> > -
> > -  // Save the LoadSDNodes that we find in the chain.
> > -  // We need to make sure that these nodes do not interfere with
> > -  // any of the store nodes.
> > -  SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
> > -
> > -  getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
> > +  SmallVector<MemOpLink, 8> StoreNodes;
> > +  // Find potential store merge candidates by searching through chain
> sub-DAG
> > +  getStoreMergeCandidates(St, StoreNodes);
> >
> >    // Check if there is anything to merge.
> >    if (StoreNodes.size() < 2)
> >      return false;
> >
> > -  // only do dependence check in AA case
> > -  bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
> > -                                                  :
> DAG.getSubtarget().useAA();
> > -  if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes))
> > +  // Check that we can merge these candidates without causing a cycle
> > +  if (!checkMergeStoreCandidatesForDependencies(StoreNodes))
> >      return false;
> >
> >    // Sort the memory operands according to their distance from the
> > -  // base pointer.  As a secondary criteria: make sure stores coming
> > -  // later in the code come first in the list. This is important for
> > -  // the non-UseAA case, because we're merging stores into the FINAL
> > -  // store along a chain which potentially contains aliasing stores.
> > -  // Thus, if there are multiple stores to the same address, the last
> > -  // one can be considered for merging but not the others.
> > +  // base pointer.
> >    std::sort(StoreNodes.begin(), StoreNodes.end(),
> >              [](MemOpLink LHS, MemOpLink RHS) {
> > -    return LHS.OffsetFromBase < RHS.OffsetFromBase ||
> > -           (LHS.OffsetFromBase == RHS.OffsetFromBase &&
> > -            LHS.SequenceNum < RHS.SequenceNum);
> > -  });
> > +              return LHS.OffsetFromBase < RHS.OffsetFromBase;
> > +            });
> >
> >    // Scan the memory operations on the chain and find the first
> non-consecutive
> >    // store memory address.
> > -  unsigned LastConsecutiveStore = 0;
> > +  unsigned NumConsecutiveStores = 0;
> >    int64_t StartAddress = StoreNodes[0].OffsetFromBase;
> > -  for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
> > -
> > -    // Check that the addresses are consecutive starting from the second
> > -    // element in the list of stores.
> > -    if (i > 0) {
> > -      int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
> > -      if (CurrAddress - StartAddress != (ElementSizeBytes * i))
> > -        break;
> > -    }
> >
> > -    // Check if this store interferes with any of the loads that we
> found.
> > -    // If we find a load that alias with this store. Stop the sequence.
> > -    if (any_of(AliasLoadNodes, [&](LSBaseSDNode *Ldn) {
> > -          return isAlias(Ldn, StoreNodes[i].MemNode);
> > -        }))
> > +  // Check that the addresses are consecutive starting from the second
> > +  // element in the list of stores.
> > +  for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
> > +    int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
> > +    if (CurrAddress - StartAddress != (ElementSizeBytes * i))
> >        break;
> > -
> > -    // Mark this node as useful.
> > -    LastConsecutiveStore = i;
> > +    NumConsecutiveStores = i + 1;
> >    }
> >
> > +  if (NumConsecutiveStores < 2)
> > +    return false;
> > +
> >    // The node with the lowest store address.
> > -  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
> > -  unsigned FirstStoreAS = FirstInChain->getAddressSpace();
> > -  unsigned FirstStoreAlign = FirstInChain->getAlignment();
> >    LLVMContext &Context = *DAG.getContext();
> >    const DataLayout &DL = DAG.getDataLayout();
> >
> >    // Store the constants into memory as one consecutive store.
> >    if (IsConstantSrc) {
> > -    unsigned LastLegalType = 0;
> > -    unsigned LastLegalVectorType = 0;
> > -    bool NonZero = false;
> > -    for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
> > -      StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
> > -      SDValue StoredVal = St->getValue();
> > -
> > -      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
> > -        NonZero |= !C->isNullValue();
> > -      } else if (ConstantFPSDNode *C =
> dyn_cast<ConstantFPSDNode>(StoredVal)) {
> > -        NonZero |= !C->getConstantFPValue()->isNullValue();
> > -      } else {
> > -        // Non-constant.
> > -        break;
> > -      }
> > +    bool RV = false;
> > +    while (NumConsecutiveStores > 1) {
> > +      LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
> > +      unsigned FirstStoreAS = FirstInChain->getAddressSpace();
> > +      unsigned FirstStoreAlign = FirstInChain->getAlignment();
> > +      unsigned LastLegalType = 0;
> > +      unsigned LastLegalVectorType = 0;
> > +      bool NonZero = false;
> > +      for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
> > +        StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
> > +        SDValue StoredVal = ST->getValue();
> > +
> > +        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
> > +          NonZero |= !C->isNullValue();
> > +        } else if (ConstantFPSDNode *C =
> > +                       dyn_cast<ConstantFPSDNode>(StoredVal)) {
> > +          NonZero |= !C->getConstantFPValue()->isNullValue();
> > +        } else {
> > +          // Non-constant.
> > +          break;
> > +        }
> >
> > -      // Find a legal type for the constant store.
> > -      unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
> > -      EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
> > -      bool IsFast;
> > -      if (TLI.isTypeLegal(StoreTy) &&
> > -          TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
> > -                                 FirstStoreAlign, &IsFast) && IsFast) {
> > -        LastLegalType = i+1;
> > -      // Or check whether a truncstore is legal.
> > -      } else if (TLI.getTypeAction(Context, StoreTy) ==
> > -                 TargetLowering::TypePromoteInteger) {
> > -        EVT LegalizedStoredValueTy =
> > -          TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
> > -        if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
> > -            TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
> > -                                   FirstStoreAS, FirstStoreAlign,
> &IsFast) &&
> > +        // Find a legal type for the constant store.
> > +        unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
> > +        EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
> > +        bool IsFast = false;
> > +        if (TLI.isTypeLegal(StoreTy) &&
> > +            TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
> > +                                   FirstStoreAlign, &IsFast) &&
> >              IsFast) {
> >            LastLegalType = i + 1;
> > +          // Or check whether a truncstore is legal.
> > +        } else if (TLI.getTypeAction(Context, StoreTy) ==
> > +                   TargetLowering::TypePromoteInteger) {
> > +          EVT LegalizedStoredValueTy =
> > +              TLI.getTypeToTransformTo(Context,
> StoredVal.getValueType());
> > +          if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
> > +              TLI.allowsMemoryAccess(Context, DL,
> LegalizedStoredValueTy,
> > +                                     FirstStoreAS, FirstStoreAlign,
> &IsFast) &&
> > +              IsFast) {
> > +            LastLegalType = i + 1;
> > +          }
> >          }
> > -      }
> >
> > -      // We only use vectors if the constant is known to be zero or the
> target
> > -      // allows it and the function is not marked with the
> noimplicitfloat
> > -      // attribute.
> > -      if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1,
> > -                                                        FirstStoreAS))
> &&
> > -          !NoVectors) {
> > -        // Find a legal type for the vector store.
> > -        EVT Ty = EVT::getVectorVT(Context, MemVT, i+1);
> > -        if (TLI.isTypeLegal(Ty) &&
> > -            TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
> > -                                   FirstStoreAlign, &IsFast) && IsFast)
> > -          LastLegalVectorType = i + 1;
> > +        // We only use vectors if the constant is known to be zero or
> the target
> > +        // allows it and the function is not marked with the
> noimplicitfloat
> > +        // attribute.
> > +        if ((!NonZero ||
> > +             TLI.storeOfVectorConstantIsCheap(MemVT, i + 1,
> FirstStoreAS)) &&
> > +            !NoVectors) {
> > +          // Find a legal type for the vector store.
> > +          EVT Ty = EVT::getVectorVT(Context, MemVT, i + 1);
> > +          if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(Ty) &&
> > +              TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
> > +                                     FirstStoreAlign, &IsFast) &&
> > +              IsFast)
> > +            LastLegalVectorType = i + 1;
> > +        }
> >        }
> > -    }
> >
> > -    // Check if we found a legal integer type to store.
> > -    if (LastLegalType == 0 && LastLegalVectorType == 0)
> > -      return false;
> > +      // Check if we found a legal integer type that creates a
> meaningful merge.
> > +      if (LastLegalType < 2 && LastLegalVectorType < 2)
> > +        break;
> >
> > -    bool UseVector = (LastLegalVectorType > LastLegalType) &&
> !NoVectors;
> > -    unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
> > +      bool UseVector = (LastLegalVectorType > LastLegalType) &&
> !NoVectors;
> > +      unsigned NumElem = (UseVector) ? LastLegalVectorType :
> LastLegalType;
> >
> > -    return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
> > -                                           true, UseVector);
> > +      bool Merged = MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT,
> NumElem,
> > +                                                    true, UseVector);
> > +      if (!Merged)
> > +        break;
> > +      // Remove merged stores for next iteration.
> > +      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() +
> NumElem);
> > +      RV = true;
> > +      NumConsecutiveStores -= NumElem;
> > +    }
> > +    return RV;
> >    }
> >
> >    // When extracting multiple vector elements, try to store them
> >    // in one vector store rather than a sequence of scalar stores.
> >    if (IsExtractVecSrc) {
> > +    LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
> > +    unsigned FirstStoreAS = FirstInChain->getAddressSpace();
> > +    unsigned FirstStoreAlign = FirstInChain->getAlignment();
> >      unsigned NumStoresToMerge = 0;
> >      bool IsVec = MemVT.isVector();
> > -    for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
> > +    for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
> >        StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
> >        unsigned StoreValOpcode = St->getValue().getOpcode();
> >        // This restriction could be loosened.
> > @@ -12260,7 +12247,7 @@ bool DAGCombiner::MergeConsecutiveStores
> >    // Find acceptable loads. Loads need to have the same chain (token
> factor),
> >    // must not be zext, volatile, indexed, and they must be consecutive.
> >    BaseIndexOffset LdBasePtr;
> > -  for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
> > +  for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
> >      StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
> >      LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
> >      if (!Ld) break;
> > @@ -12293,7 +12280,7 @@ bool DAGCombiner::MergeConsecutiveStores
> >      }
> >
> >      // We found a potential memory operand to merge.
> > -    LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
> > +    LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset));
> >    }
> >
> >    if (LoadNodes.size() < 2)
> > @@ -12305,7 +12292,9 @@ bool DAGCombiner::MergeConsecutiveStores
> >    if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT,
> RequiredAlignment) &&
> >        St->getAlignment() >= RequiredAlignment)
> >      return false;
> > -
> > +  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
> > +  unsigned FirstStoreAS = FirstInChain->getAddressSpace();
> > +  unsigned FirstStoreAlign = FirstInChain->getAlignment();
> >    LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
> >    unsigned FirstLoadAS = FirstLoad->getAddressSpace();
> >    unsigned FirstLoadAlign = FirstLoad->getAlignment();
> > @@ -12374,30 +12363,19 @@ bool DAGCombiner::MergeConsecutiveStores
> >
> >    // We add +1 here because the LastXXX variables refer to location
> while
> >    // the NumElem refers to array/index size.
> > -  unsigned NumElem = std::min(LastConsecutiveStore,
> LastConsecutiveLoad) + 1;
> > +  unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad
> + 1);
> >    NumElem = std::min(LastLegalType, NumElem);
> >
> >    if (NumElem < 2)
> >      return false;
> >
> > -  // Collect the chains from all merged stores.
> > +  // Collect the chains from all merged stores. Because the common case
> > +  // all chains are the same, check if we match the first Chain.
> >    SmallVector<SDValue, 8> MergeStoreChains;
> >    MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain());
> > -
> > -  // The latest Node in the DAG.
> > -  unsigned LatestNodeUsed = 0;
> > -  for (unsigned i=1; i<NumElem; ++i) {
> > -    // Find a chain for the new wide-store operand. Notice that some
> > -    // of the store nodes that we found may not be selected for
> inclusion
> > -    // in the wide store. The chain we use needs to be the chain of the
> > -    // latest store node which is *used* and replaced by the wide store.
> > -    if (StoreNodes[i].SequenceNum <
> StoreNodes[LatestNodeUsed].SequenceNum)
> > -      LatestNodeUsed = i;
> > -
> > -    MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain());
> > -  }
> > -
> > -  LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
> > +  for (unsigned i = 1; i < NumElem; ++i)
> > +    if (StoreNodes[0].MemNode->getChain() !=
> StoreNodes[i].MemNode->getChain())
> > +      MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain());
> >
> >    // Find if it is better to use vectors or integers to load and store
> >    // to memory.
> > @@ -12421,6 +12399,8 @@ bool DAGCombiner::MergeConsecutiveStores
> >    SDValue NewStoreChain =
> >      DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other,
> MergeStoreChains);
> >
> > +  AddToWorklist(NewStoreChain.getNode());
> > +
> >    SDValue NewStore =
> >        DAG.getStore(NewStoreChain, StoreDL, NewLoad,
> FirstInChain->getBasePtr(),
> >                     FirstInChain->getPointerInfo(), FirstStoreAlign);
> > @@ -12432,25 +12412,9 @@ bool DAGCombiner::MergeConsecutiveStores
> >                                    SDValue(NewLoad.getNode(), 1));
> >    }
> >
> > -  if (UseAA) {
> > -    // Replace the all stores with the new store.
> > -    for (unsigned i = 0; i < NumElem; ++i)
> > -      CombineTo(StoreNodes[i].MemNode, NewStore);
> > -  } else {
> > -    // Replace the last store with the new store.
> > -    CombineTo(LatestOp, NewStore);
> > -    // Erase all other stores.
> > -    for (unsigned i = 0; i < NumElem; ++i) {
> > -      // Remove all Store nodes.
> > -      if (StoreNodes[i].MemNode == LatestOp)
> > -        continue;
> > -      StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
> > -      DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
> > -      deleteAndRecombine(St);
> > -    }
> > -  }
> > -
> > -  StoreNodes.erase(StoreNodes.begin() + NumElem, StoreNodes.end());
> > +  // Replace the all stores with the new store.
> > +  for (unsigned i = 0; i < NumElem; ++i)
> > +    CombineTo(StoreNodes[i].MemNode, NewStore);
> >    return true;
> >  }
> >
> > @@ -12607,19 +12571,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *
> >    if (SDValue NewST = TransformFPLoadStorePair(N))
> >      return NewST;
> >
> > -  bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
> > -                                                  :
> DAG.getSubtarget().useAA();
> > -#ifndef NDEBUG
> > -  if (CombinerAAOnlyFunc.getNumOccurrences() &&
> > -      CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
> > -    UseAA = false;
> > -#endif
> > -  if (UseAA && ST->isUnindexed()) {
> > -    // FIXME: We should do this even without AA enabled. AA will just
> allow
> > -    // FindBetterChain to work in more situations. The problem with
> this is that
> > -    // any combine that expects memory operations to be on consecutive
> chains
> > -    // first needs to be updated to look for users of the same chain.
> > -
> > +  if (ST->isUnindexed()) {
> >      // Walk up chain skipping non-aliasing memory nodes, on this store
> and any
> >      // adjacent stores.
> >      if (findBetterNeighborChains(ST)) {
> > @@ -12653,8 +12605,13 @@ SDValue DAGCombiner::visitSTORE(SDNode *
> >      if (SimplifyDemandedBits(
> >              Value,
> >              APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
> > -
>  ST->getMemoryVT().getScalarSizeInBits())))
> > +
>  ST->getMemoryVT().getScalarSizeInBits()))) {
> > +      // Re-visit the store if anything changed; SimplifyDemandedBits
> > +      // will add Value's node back to the worklist if necessary, but
> > +      // we also need to re-visit the Store node itself.
> > +      AddToWorklist(N);
> >        return SDValue(N, 0);
> > +    }
> >    }
> >
> >    // If this is a load followed by a store to the same location, then
> the store
> > @@ -12698,15 +12655,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *
> >        // There can be multiple store sequences on the same chain.
> >        // Keep trying to merge store sequences until we are unable to do
> so
> >        // or until we merge the last store on the chain.
> > -      SmallVector<MemOpLink, 8> StoreNodes;
> > -      bool Changed = MergeConsecutiveStores(ST, StoreNodes);
> > +      bool Changed = MergeConsecutiveStores(ST);
> >        if (!Changed) break;
> > -
> > -      if (any_of(StoreNodes,
> > -                 [ST](const MemOpLink &Link) { return Link.MemNode ==
> ST; })) {
> > -        // ST has been merged and no longer exists.
> > +      // Return N as merge only uses CombineTo and no worklist clean
> > +      // up is necessary.
> > +      if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
> >          return SDValue(N, 0);
> > -      }
> >      }
> >    }
> >
> > @@ -12715,7 +12669,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *
> >    // Make sure to do this only after attempting to merge stores in
> order to
> >    //  avoid changing the types of some subset of stores due to visit
> order,
> >    //  preventing their merging.
> > -  if (isa<ConstantFPSDNode>(Value)) {
> > +  if (isa<ConstantFPSDNode>(ST->getValue())) {
> >      if (SDValue NewSt = replaceStoreOfFPConstant(ST))
> >        return NewSt;
> >    }
> > @@ -13652,6 +13606,35 @@ SDValue DAGCombiner::visitBUILD_VECTOR(S
> >    if (ISD::allOperandsUndef(N))
> >      return DAG.getUNDEF(VT);
> >
> > +  // Check if we can express BUILD VECTOR via subvector extract.
> > +  if (!LegalTypes && (N->getNumOperands() > 1)) {
> > +    SDValue Op0 = N->getOperand(0);
> > +    auto checkElem = [&](SDValue Op) -> uint64_t {
> > +      if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
> > +          (Op0.getOperand(0) == Op.getOperand(0)))
> > +        if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
> > +          return CNode->getZExtValue();
> > +      return -1;
> > +    };
> > +
> > +    int Offset = checkElem(Op0);
> > +    for (unsigned i = 0; i < N->getNumOperands(); ++i) {
> > +      if (Offset + i != checkElem(N->getOperand(i))) {
> > +        Offset = -1;
> > +        break;
> > +      }
> > +    }
> > +
> > +    if ((Offset == 0) &&
> > +        (Op0.getOperand(0).getValueType() == N->getValueType(0)))
> > +      return Op0.getOperand(0);
> > +    if ((Offset != -1) &&
> > +        ((Offset % N->getValueType(0).getVectorNumElements()) ==
> > +         0)) // IDX must be multiple of output size.
> > +      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
> N->getValueType(0),
> > +                         Op0.getOperand(0), Op0.getOperand(1));
> > +  }
> > +
> >    if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
> >      return V;
> >
> > @@ -15748,7 +15731,7 @@ static bool FindBaseOffset(SDValue Ptr,
> >    if (Base.getOpcode() == ISD::ADD) {
> >      if (ConstantSDNode *C =
> dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
> >        Base = Base.getOperand(0);
> > -      Offset += C->getZExtValue();
> > +      Offset += C->getSExtValue();
> >      }
> >    }
> >
> > @@ -15945,6 +15928,12 @@ void DAGCombiner::GatherAllAliases(SDNod
> >        ++Depth;
> >        break;
> >
> > +    case ISD::CopyFromReg:
> > +      // Forward past CopyFromReg.
> > +      Chains.push_back(Chain.getOperand(0));
> > +      ++Depth;
> > +      break;
> > +
> >      default:
> >        // For all other instructions we will just have to take what we
> can get.
> >        Aliases.push_back(Chain);
> > @@ -15973,6 +15962,18 @@ SDValue DAGCombiner::FindBetterChain(SDN
> >    return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
> >  }
> >
> > +// This function tries to collect a bunch of potentially interesting
> > +// nodes to improve the chains of, all at once. This might seem
> > +// redundant, as this function gets called when visiting every store
> > +// node, so why not let the work be done on each store as it's visited?
> > +//
> > +// I believe this is mainly important because MergeConsecutiveStores
> > +// is unable to deal with merging stores of different sizes, so unless
> > +// we improve the chains of all the potential candidates up-front
> > +// before running MergeConsecutiveStores, it might only see some of
> > +// the nodes that will eventually be candidates, and then not be able
> > +// to go from a partially-merged state to the desired final
> > +// fully-merged state.
> >  bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
> >    // This holds the base pointer, index, and the offset in bytes from
> the base
> >    // pointer.
> > @@ -16008,10 +16009,8 @@ bool DAGCombiner::findBetterNeighborChai
> >      if (!Ptr.equalBaseIndex(BasePtr))
> >        break;
> >
> > -    // Find the next memory operand in the chain. If the next operand
> in the
> > -    // chain is a store then move up and continue the scan with the next
> > -    // memory operand. If the next operand is a load save it and use
> alias
> > -    // information to check if it interferes with anything.
> > +    // Walk up the chain to find the next store node, ignoring any
> > +    // intermediate loads. Any other kind of node will halt the loop.
> >      SDNode *NextInChain = Index->getChain().getNode();
> >      while (true) {
> >        if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
> > @@ -16030,9 +16029,14 @@ bool DAGCombiner::findBetterNeighborChai
> >          Index = nullptr;
> >          break;
> >        }
> > -    }
> > +    } // end while
> >    }
> >
> > +  // At this point, ChainedStores lists all of the Store nodes
> > +  // reachable by iterating up through chain nodes matching the above
> > +  // conditions.  For each such store identified, try to find an
> > +  // earlier chain to attach the store to which won't violate the
> > +  // required ordering.
> >    bool MadeChangeToSt = false;
> >    SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
> >
> >
> > Modified: llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp?rev=296476&r1=296475&r2=296476&view=diff
> >
> ==============================================================================
> > --- llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp (original)
> > +++ llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp Tue Feb 28 08:24:15
> 2017
> > @@ -850,7 +850,7 @@ TargetLoweringBase::TargetLoweringBase(c
> >    MinFunctionAlignment = 0;
> >    PrefFunctionAlignment = 0;
> >    PrefLoopAlignment = 0;
> > -  GatherAllAliasesMaxDepth = 6;
> > +  GatherAllAliasesMaxDepth = 18;
> >    MinStackArgumentAlignment = 1;
> >    // TODO: the default will be switched to 0 in the next commit, along
> >    // with the Target-specific changes necessary.
> >
> > Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=296476&r1=296475&r2=296476&view=diff
> >
> ==============================================================================
> > --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
> > +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Tue Feb 28
> 08:24:15 2017
> > @@ -9254,7 +9254,7 @@ static SDValue performSTORECombine(SDNod
> >    return SDValue();
> >  }
> >
> > -  /// This function handles the log2-shuffle pattern produced by the
> > +/// This function handles the log2-shuffle pattern produced by the
> >  /// LoopVectorizer for the across vector reduction. It consists of
> >  /// log2(NumVectorElements) steps and, in each step, 2^(s) elements
> >  /// are reduced, where s is an induction variable from 0 to
> >
> > Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=296476&r1=296475&r2=296476&view=diff
> >
> ==============================================================================
> > --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
> > +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Tue Feb 28 08:24:15 2017
> > @@ -500,6 +500,11 @@ class InstrItineraryData;
> >      bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
> >                                     unsigned &Cost) const override;
> >
> > +    bool canMergeStoresTo(EVT MemVT) const override {
> > +      // Do not merge to larger than i32.
> > +      return (MemVT.getSizeInBits() <= 32);
> > +    }
> > +
> >      bool isCheapToSpeculateCttz() const override;
> >      bool isCheapToSpeculateCtlz() const override;
> >
> >
> > Modified: llvm/trunk/test/CodeGen/AArch64/argument-blocks.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/argument-blocks.ll?rev=296476&r1=296475&r2=296476&view=diff
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/AArch64/argument-blocks.ll (original)
> > +++ llvm/trunk/test/CodeGen/AArch64/argument-blocks.ll Tue Feb 28
> 08:24:15 2017
> > @@ -59,10 +59,10 @@ define i64 @test_hfa_ignores_gprs([7 x f
> >  }
> >
> >  ; [2 x float] should not be promoted to double by the Darwin varargs
> handling,
> > -; but should go in an 8-byte aligned slot.
> > +; but should go in an 8-byte aligned slot and can be merged as integer
> stores.
> >  define void @test_varargs_stackalign() {
> >  ; CHECK-LABEL: test_varargs_stackalign:
> > -; CHECK-DARWINPCS: stp {{w[0-9]+}}, {{w[0-9]+}}, [sp, #16]
> > +; CHECK-DARWINPCS: str {{x[0-9]+}}, [sp, #16]
> >
> >    call void(...) @callee([3 x float] undef, [2 x float] [float 1.0,
> float 2.0])
> >    ret void
> >
> > Modified: llvm/trunk/test/CodeGen/AArch64/arm64-abi.ll
> > URL:
> <http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-abi.ll?rev=296476&r1=296475&r2=296476&view=diff>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170303/6c27750e/attachment.html>


More information about the llvm-commits mailing list