[llvm] r296476 - In visitSTORE, always use FindBetterChain, rather than only when UseAA is enabled.

Chandler Carruth via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 3 02:15:26 PST 2017


Reverted in r296862. I'll follow up both with test cases and some code
review comments to point at likely suspects.

On Thu, Mar 2, 2017 at 11:58 PM Chandler Carruth <chandlerc at gmail.com>
wrote:

> The compile time regression is still present after that.
>
>
> On Thu, Mar 2, 2017 at 11:56 PM Mikael Holmén via llvm-commits <
> llvm-commits at lists.llvm.org> wrote:
>
>
>
> On 03/03/2017 10:28 AM, Chandler Carruth wrote:
> > I also now have an over 20x compile time regression that this triggers.
> > Sadly my test case is still huge, but we can reduce it. Especially given
> > the crasher, i'm going to revert for now.
>
> As far as I know, the crash has been fixed already in r296668, see:
>
> http://bugs.llvm.org/show_bug.cgi?id=32108
>
> The regressions you're seeing might be enough for revert anyway, I don't
> know, but at least the crash seems to be fixed.
>
> Regards,
> Mikael
>
> >
> > On Wed, Mar 1, 2017 at 12:48 AM Mikael Holmén via llvm-commits
> > <llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>>
> wrote:
> >
> >     Hi Nirav,
> >
> >     The attached (llvm-stress generated and then bugpoint reduced)
> ll-file
> >     crashes llc with this commit:
> >
> >     llc -march=x86-64 stress_reduced.ll
> >
> >     gives
> >
> >     llc: ../lib/CodeGen/SelectionDAG/SelectionDAG.cpp:746: bool
> >     llvm::SelectionDAG::RemoveNodeFromCSEMaps(llvm::SDNode *): Assertion
> >     `N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!"'
> >     failed.
> >     #0 0x0b161bcc llvm::sys::PrintStackTrace(llvm::raw_ostream&)
> >
>  /data/repo/llvm-patch/build-all-Debug/../lib/Support/Unix/Signals.inc:402:5
> >     #1 0x0b161dfe PrintStackTraceSignalHandler(void*)
> >
>  /data/repo/llvm-patch/build-all-Debug/../lib/Support/Unix/Signals.inc:466:1
> >     #2 0x0b15fdff llvm::sys::RunSignalHandlers()
> >     /data/repo/llvm-patch/build-all-Debug/../lib/Support/Signals.cpp:45:5
> >     #3 0x0b1625b1 SignalHandler(int)
> >
>  /data/repo/llvm-patch/build-all-Debug/../lib/Support/Unix/Signals.inc:256:1
> >     #4 0xf77c6410  0xffffe410
> llvm::SelectionDAG::DeleteNode(llvm::SDNode*)
> >     #5 0xf77c6410
> >
>  /data/repo/llvm-patch/build-all-Debug/../lib/CodeGen/SelectionDAG/SelectionDAG.cpp:615:26
> >
> >     #6 0xf77c6410 (anonymous
> >     namespace)::DAGCombiner::recursivelyDeleteUnusedNodes(llvm::SDNode*)
> >
>  /data/repo/llvm-patch/build-all-Debug/../lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1260:5
> >     #7 0x0aecc079 (anonymous
> >     namespace)::DAGCombiner::Run(llvm::CombineLevel)
> >
>  /data/repo/llvm-patch/build-all-Debug/../lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1302:9
> >     #8 0x0ad0cb1f llvm::SelectionDAG::Combine(llvm::CombineLevel,
> >     llvm::AAResults&, llvm::CodeGenOpt::Level)
> >
>  /data/repo/llvm-patch/build-all-Debug/../lib/CodeGen/SelectionDAG/DAGCombiner.cpp:16066:3
> >     #9 0x0ad0c044 llvm::SelectionDAGISel::CodeGenAndEmitDAG()
> >
>  /data/repo/llvm-patch/build-all-Debug/../lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:871:7
> >     #10 0x0ad0bcd6
> >
>  llvm::SelectionDAGISel::SelectBasicBlock(llvm::ilist_iterator<llvm::ilist_detail::node_options<llvm::Instruction,
> >     true, false, void>, false, true>,
> >
>  llvm::ilist_iterator<llvm::ilist_detail::node_options<llvm::Instruction,
> >     true, false, void>, false, true>, bool&)
> >
>  /data/repo/llvm-patch/build-all-Debug/../lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:726:1
> >     #11 0x0af36f98
> >     llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&)
> >
>  /data/repo/llvm-patch/build-all-Debug/../lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:1689:5
> >     #12 0x0af357a7
> >     llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&)
> >
>  /data/repo/llvm-patch/build-all-Debug/../lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:543:7
> >     #13 0x0af3558a (anonymous
> >
>  namespace)::X86DAGToDAGISel::runOnMachineFunction(llvm::MachineFunction&)
> >
>  /data/repo/llvm-patch/build-all-Debug/../lib/Target/X86/X86ISelDAGToDAG.cpp:176:25
> >     #14 0x0af32754
> llvm::MachineFunctionPass::runOnFunction(llvm::Function&)
> >
>  /data/repo/llvm-patch/build-all-Debug/../lib/CodeGen/MachineFunctionPass.cpp:62:8
> >     #15 0x09936a8d llvm::FPPassManager::runOnFunction(llvm::Function&)
> >
>  /data/repo/llvm-patch/build-all-Debug/../lib/IR/LegacyPassManager.cpp:1513:23
> >     #16 0x0a2c0401 llvm::FPPassManager::runOnModule(llvm::Module&)
> >
>  /data/repo/llvm-patch/build-all-Debug/../lib/IR/LegacyPassManager.cpp:1534:16
> >     #17 0x0a7f6c23 (anonymous
> >     namespace)::MPPassManager::runOnModule(llvm::Module&)
> >
>  /data/repo/llvm-patch/build-all-Debug/../lib/IR/LegacyPassManager.cpp:1590:23
> >     #18 0x0a7f6fca llvm::legacy::PassManagerImpl::run(llvm::Module&)
> >
>  /data/repo/llvm-patch/build-all-Debug/../lib/IR/LegacyPassManager.cpp:1693:16
> >     #19 0x0a7f788c llvm::legacy::PassManager::run(llvm::Module&)
> >
>  /data/repo/llvm-patch/build-all-Debug/../lib/IR/LegacyPassManager.cpp:1724:3
> >     #20 0x0a7f72dc compileModule(char**, llvm::LLVMContext&)
> >     /data/repo/llvm-patch/build-all-Debug/../tools/llc/llc.cpp:579:42
> >     #21 0x0a7f7ee6 main
> >     /data/repo/llvm-patch/build-all-Debug/../tools/llc/llc.cpp:331:13
> >     #22 0x0860a90b __libc_start_main
> >     /build/eglibc-X4bnBz/eglibc-2.19/csu/libc-start.c:321:0
> >
>  build-all-Debug/bin/llc(_ZN4llvm3sys15PrintStackTraceERNS_11raw_ostreamE+0x5c)[0xb161bcc]
> >     build-all-Debug/bin/llc[0xb161dfe]
> >
>  build-all-Debug/bin/llc(_ZN4llvm3sys17RunSignalHandlersEv+0xbf)[0xb15fdff]
> >     build-all-Debug/bin/llc[0xb1625b1]
> >     [0xf77c6410]
> >
>  build-all-Debug/bin/llc(_ZN4llvm12SelectionDAG10DeleteNodeEPNS_6SDNodeE+0x39)[0xaecc079]
> >     build-all-Debug/bin/llc[0xad0cb1f]
> >     build-all-Debug/bin/llc[0xad0c044]
> >
>  build-all-Debug/bin/llc(_ZN4llvm12SelectionDAG7CombineENS_12CombineLevelERNS_9AAResultsENS_10CodeGenOpt5LevelE+0x76)[0xad0bcd6]
> >
>  build-all-Debug/bin/llc(_ZN4llvm16SelectionDAGISel17CodeGenAndEmitDAGEv+0x17e8)[0xaf36f98]
> >
>  build-all-Debug/bin/llc(_ZN4llvm16SelectionDAGISel16SelectBasicBlockENS_14ilist_iteratorINS_12ilist_detail12node_optionsINS_11InstructionELb1ELb0EvEELb0ELb1EEES6_Rb+0x137)[0xaf357a7]
> >
>  build-all-Debug/bin/llc(_ZN4llvm16SelectionDAGISel20SelectAllBasicBlocksERKNS_8FunctionE+0x144a)[0xaf3558a]
> >
>  build-all-Debug/bin/llc(_ZN4llvm16SelectionDAGISel20runOnMachineFunctionERNS_15MachineFunctionE+0x734)[0xaf32754]
> >     build-all-Debug/bin/llc[0x9936a8d]
> >
>  build-all-Debug/bin/llc(_ZN4llvm19MachineFunctionPass13runOnFunctionERNS_8FunctionE+0x251)[0xa2c0401]
> >
>  build-all-Debug/bin/llc(_ZN4llvm13FPPassManager13runOnFunctionERNS_8FunctionE+0x1b3)[0xa7f6c23]
> >
>  build-all-Debug/bin/llc(_ZN4llvm13FPPassManager11runOnModuleERNS_6ModuleE+0xaa)[0xa7f6fca]
> >     build-all-Debug/bin/llc[0xa7f788c]
> >
>  build-all-Debug/bin/llc(_ZN4llvm6legacy15PassManagerImpl3runERNS_6ModuleE+0x14c)[0xa7f72dc]
> >
>  build-all-Debug/bin/llc(_ZN4llvm6legacy11PassManager3runERNS_6ModuleE+0x36)[0xa7f7ee6]
> >     build-all-Debug/bin/llc[0x860a90b]
> >     build-all-Debug/bin/llc(main+0x5b0)[0x86084e0]
> >     /lib/i386-linux-gnu/libc.so.6(__libc_start_main+0xf3)[0xf7493af3]
> >     Stack dump:
> >     0.      Program arguments: build-all-Debug/bin/llc -march=x86-64
> >     stress_reduced.ll
> >     1.      Running pass 'Function Pass Manager' on module
> >     'stress_reduced.ll'.
> >     2.      Running pass 'X86 DAG->DAG Instruction Selection' on function
> >     '@autogen_SD1794'
> >     Abort
> >
> >     Regards,
> >     Mikael
> >
> >
> >     On 02/28/2017 03:24 PM, Nirav Dave via llvm-commits wrote:
> >     > Author: niravd
> >     > Date: Tue Feb 28 08:24:15 2017
> >     > New Revision: 296476
> >     >
> >     > URL: http://llvm.org/viewvc/llvm-project?rev=296476&view=rev
> >     > Log:
> >     > In visitSTORE, always use FindBetterChain, rather than only when
> >     UseAA is enabled.
> >     >
> >     >     Recommiting after fixup of 32-bit aliasing sign offset bug in
> >     DAGCombiner.
> >     >
> >     >     * Simplify Consecutive Merge Store Candidate Search
> >     >
> >     >     Now that address aliasing is much less conservative, push
> through
> >     >     simplified store merging search and chain alias analysis which
> >     only
> >     >     checks for parallel stores through the chain subgraph. This is
> >     cleaner
> >     >     as the separation of non-interfering loads/stores from the
> >     >     store-merging logic.
> >     >
> >     >     When merging stores search up the chain through a single load,
> and
> >     >     finds all possible stores by looking down from through a load
> >     and a
> >     >     TokenFactor to all stores visited.
> >     >
> >     >     This improves the quality of the output SelectionDAG and the
> >     output
> >     >     Codegen (save perhaps for some ARM cases where we correctly
> >     constructs
> >     >     wider loads, but then promotes them to float operations which
> >     appear
> >     >     but requires more expensive constant generation).
> >     >
> >     >     Some minor peephole optimizations to deal with improved SubDAG
> >     shapes (listed below)
> >     >
> >     >     Additional Minor Changes:
> >     >
> >     >       1. Finishes removing unused AliasLoad code
> >     >
> >     >       2. Unifies the chain aggregation in the merged stores across
> >     code
> >     >          paths
> >     >
> >     >       3. Re-add the Store node to the worklist after calling
> >     >          SimplifyDemandedBits.
> >     >
> >     >       4. Increase GatherAllAliasesMaxDepth from 6 to 18. That
> >     number is
> >     >          arbitrary, but seems sufficient to not cause regressions
> in
> >     >          tests.
> >     >
> >     >       5. Remove Chain dependencies of Memory operations on
> CopyfromReg
> >     >          nodes as these are captured by data dependence
> >     >
> >     >       6. Forward loads-store values through tokenfactors containing
> >     >           {CopyToReg,CopyFromReg} Values.
> >     >
> >     >       7. Peephole to convert buildvector of extract_vector_elt to
> >     >          extract_subvector if possible (see
> >     >          CodeGen/AArch64/store-merge.ll)
> >     >
> >     >       8. Store merging for the ARM target is restricted to 32-bit
> as
> >     >          some in some contexts invalid 64-bit operations are being
> >     >          generated. This can be removed once appropriate checks are
> >     >          added.
> >     >
> >     >     This finishes the change Matt Arsenault started in r246307 and
> >     >     jyknight's original patch.
> >     >
> >     >     Many tests required some changes as memory operations are now
> >     >     reorderable, improving load-store forwarding. One test in
> >     >     particular is worth noting:
> >     >
> >     >       CodeGen/PowerPC/ppc64-align-long-double.ll - Improved
> load-store
> >     >       forwarding converts a load-store pair into a parallel store
> and
> >     >       a memory-realized bitcast of the same value. However,
> because we
> >     >       lose the sharing of the explicit and implicit store values we
> >     >       must create another local store. A similar transformation
> >     >       happens before SelectionDAG as well.
> >     >
> >     >     Reviewers: arsenm, hfinkel, tstellarAMD, jyknight, nhaehnle
> >     >
> >     > Removed:
> >     >     llvm/trunk/test/CodeGen/X86/combiner-aa-0.ll
> >     >     llvm/trunk/test/CodeGen/X86/combiner-aa-1.ll
> >     >     llvm/trunk/test/CodeGen/X86/pr18023.ll
> >     > Modified:
> >     >     llvm/trunk/include/llvm/Target/TargetLowering.h
> >     >     llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> >     >     llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp
> >     >     llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
> >     >     llvm/trunk/lib/Target/ARM/ARMISelLowering.h
> >     >     llvm/trunk/test/CodeGen/AArch64/argument-blocks.ll
> >     >     llvm/trunk/test/CodeGen/AArch64/arm64-abi.ll
> >     >     llvm/trunk/test/CodeGen/AArch64/arm64-memset-inline.ll
> >     >     llvm/trunk/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
> >     >     llvm/trunk/test/CodeGen/AArch64/merge-store.ll
> >     >     llvm/trunk/test/CodeGen/AArch64/vector_merge_dep_check.ll
> >     >     llvm/trunk/test/CodeGen/AMDGPU/debugger-insert-nops.ll
> >     >     llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll
> >     >     llvm/trunk/test/CodeGen/AMDGPU/merge-stores.ll
> >     >     llvm/trunk/test/CodeGen/AMDGPU/private-element-size.ll
> >     >     llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
> >     >     llvm/trunk/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
> >     >     llvm/trunk/test/CodeGen/ARM/alloc-no-stack-realign.ll
> >     >     llvm/trunk/test/CodeGen/ARM/gpr-paired-spill.ll
> >     >     llvm/trunk/test/CodeGen/ARM/ifcvt10.ll
> >     >     llvm/trunk/test/CodeGen/ARM/illegal-bitfield-loadstore.ll
> >     >     llvm/trunk/test/CodeGen/ARM/static-addr-hoisting.ll
> >     >     llvm/trunk/test/CodeGen/BPF/undef.ll
> >     >     llvm/trunk/test/CodeGen/MSP430/Inst16mm.ll
> >     >     llvm/trunk/test/CodeGen/Mips/cconv/arguments-float.ll
> >     >     llvm/trunk/test/CodeGen/Mips/cconv/arguments-varargs.ll
> >     >     llvm/trunk/test/CodeGen/Mips/fastcc.ll
> >     >     llvm/trunk/test/CodeGen/Mips/load-store-left-right.ll
> >     >     llvm/trunk/test/CodeGen/Mips/micromips-li.ll
> >     >     llvm/trunk/test/CodeGen/Mips/mips64-f128-call.ll
> >     >     llvm/trunk/test/CodeGen/Mips/mips64-f128.ll
> >     >     llvm/trunk/test/CodeGen/Mips/mno-ldc1-sdc1.ll
> >     >     llvm/trunk/test/CodeGen/Mips/msa/f16-llvm-ir.ll
> >     >     llvm/trunk/test/CodeGen/Mips/msa/i5_ld_st.ll
> >     >     llvm/trunk/test/CodeGen/Mips/o32_cc_byval.ll
> >     >     llvm/trunk/test/CodeGen/Mips/o32_cc_vararg.ll
> >     >     llvm/trunk/test/CodeGen/PowerPC/anon_aggr.ll
> >     >     llvm/trunk/test/CodeGen/PowerPC/complex-return.ll
> >     >     llvm/trunk/test/CodeGen/PowerPC/jaggedstructs.ll
> >     >     llvm/trunk/test/CodeGen/PowerPC/ppc64-align-long-double.ll
> >     >     llvm/trunk/test/CodeGen/PowerPC/structsinmem.ll
> >     >     llvm/trunk/test/CodeGen/PowerPC/structsinregs.ll
> >     >     llvm/trunk/test/CodeGen/SystemZ/unaligned-01.ll
> >     >     llvm/trunk/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
> >     >     llvm/trunk/test/CodeGen/Thumb/stack-access.ll
> >     >     llvm/trunk/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
> >     >     llvm/trunk/test/CodeGen/X86/2012-11-28-merge-store-alias.ll
> >     >     llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
> >     >     llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
> >     >     llvm/trunk/test/CodeGen/X86/chain_order.ll
> >     >     llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll
> >     >     llvm/trunk/test/CodeGen/X86/copy-eflags.ll
> >     >     llvm/trunk/test/CodeGen/X86/dag-merge-fast-accesses.ll
> >     >     llvm/trunk/test/CodeGen/X86/dont-trunc-store-double-to-float.ll
> >     >
> >
> llvm/trunk/test/CodeGen/X86/extractelement-legalization-store-ordering.ll
> >     >     llvm/trunk/test/CodeGen/X86/i256-add.ll
> >     >     llvm/trunk/test/CodeGen/X86/i386-shrink-wrapping.ll
> >     >     llvm/trunk/test/CodeGen/X86/illegal-bitfield-loadstore.ll
> >     >     llvm/trunk/test/CodeGen/X86/live-range-nosubreg.ll
> >     >     llvm/trunk/test/CodeGen/X86/longlong-deadload.ll
> >     >     llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll
> >     >     llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-256.ll
> >     >
>  llvm/trunk/test/CodeGen/X86/merge-store-partially-alias-loads.ll
> >     >     llvm/trunk/test/CodeGen/X86/split-store.ll
> >     >     llvm/trunk/test/CodeGen/X86/stores-merging.ll
> >     >     llvm/trunk/test/CodeGen/X86/vector-compare-results.ll
> >     >     llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll
> >     >     llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-256.ll
> >     >     llvm/trunk/test/CodeGen/X86/vectorcall.ll
> >     >     llvm/trunk/test/CodeGen/X86/win32-eh.ll
> >     >     llvm/trunk/test/CodeGen/XCore/varargs.ll
> >     >
> >     > Modified: llvm/trunk/include/llvm/Target/TargetLowering.h
> >     > URL:
> >
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=296476&r1=296475&r2=296476&view=diff
> >     >
> >
>  ==============================================================================
> >     > --- llvm/trunk/include/llvm/Target/TargetLowering.h (original)
> >     > +++ llvm/trunk/include/llvm/Target/TargetLowering.h Tue Feb 28
> >     08:24:15 2017
> >     > @@ -363,6 +363,9 @@ public:
> >     >      return false;
> >     >    }
> >     >
> >     > +  /// Returns if it's reasonable to merge stores to MemVT size.
> >     > +  virtual bool canMergeStoresTo(EVT MemVT) const { return true; }
> >     > +
> >     >    /// \brief Return true if it is cheap to speculate a call to
> >     intrinsic cttz.
> >     >    virtual bool isCheapToSpeculateCttz() const {
> >     >      return false;
> >     >
> >     > Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> >     > URL:
> >
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=296476&r1=296475&r2=296476&view=diff
> >     >
> >
>  ==============================================================================
> >     > --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> >     > +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Feb 28
> >     08:24:15 2017
> >     > @@ -53,10 +53,6 @@ STATISTIC(SlicedLoads, "Number of load s
> >     >
> >     >  namespace {
> >     >    static cl::opt<bool>
> >     > -    CombinerAA("combiner-alias-analysis", cl::Hidden,
> >     > -               cl::desc("Enable DAG combiner alias-analysis
> >     heuristics"));
> >     > -
> >     > -  static cl::opt<bool>
> >     >      CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
> >     >                 cl::desc("Enable DAG combiner's use of IR alias
> >     analysis"));
> >     >
> >     > @@ -419,15 +415,12 @@ namespace {
> >     >      /// Holds a pointer to an LSBaseSDNode as well as information
> >     on where it
> >     >      /// is located in a sequence of memory operations connected
> >     by a chain.
> >     >      struct MemOpLink {
> >     > -      MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
> >     > -      MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
> >     > +      MemOpLink(LSBaseSDNode *N, int64_t Offset)
> >     > +          : MemNode(N), OffsetFromBase(Offset) {}
> >     >        // Ptr to the mem node.
> >     >        LSBaseSDNode *MemNode;
> >     >        // Offset from the base ptr.
> >     >        int64_t OffsetFromBase;
> >     > -      // What is the sequence number of this mem node.
> >     > -      // Lowest mem operand in the DAG starts at zero.
> >     > -      unsigned SequenceNum;
> >     >      };
> >     >
> >     >      /// This is a helper function for visitMUL to check the
> >     profitability
> >     > @@ -438,12 +431,6 @@ namespace {
> >     >                                       SDValue &AddNode,
> >     >                                       SDValue &ConstNode);
> >     >
> >     > -    /// This is a helper function for
> >     MergeStoresOfConstantsOrVecElts. Returns a
> >     > -    /// constant build_vector of the stored constant values in
> >     Stores.
> >     > -    SDValue getMergedConstantVectorStore(SelectionDAG &DAG, const
> >     SDLoc &SL,
> >     > -                                         ArrayRef<MemOpLink>
> Stores,
> >     > -                                         SmallVectorImpl<SDValue>
> >     &Chains,
> >     > -                                         EVT Ty) const;
> >     >
> >     >      /// This is a helper function for visitAND and
> >     visitZERO_EXTEND.  Returns
> >     >      /// true if the (and (load x) c) pattern matches an extload.
> >     ExtVT returns
> >     > @@ -457,18 +444,15 @@ namespace {
> >     >      /// This is a helper function for MergeConsecutiveStores.
> >     When the source
> >     >      /// elements of the consecutive stores are all constants or
> >     all extracted
> >     >      /// vector elements, try to merge them into one larger store.
> >     > -    /// \return number of stores that were merged into a merged
> >     store (always
> >     > -    /// a prefix of \p StoreNode).
> >     > -    bool MergeStoresOfConstantsOrVecElts(
> >     > -        SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
> >     unsigned NumStores,
> >     > -        bool IsConstantSrc, bool UseVector);
> >     > +    /// \return True if a merged store was created.
> >     > +    bool
> >     MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink>
> &StoreNodes,
> >     > +                                         EVT MemVT, unsigned
> >     NumStores,
> >     > +                                         bool IsConstantSrc, bool
> >     UseVector);
> >     >
> >     >      /// This is a helper function for MergeConsecutiveStores.
> >     >      /// Stores that may be merged are placed in StoreNodes.
> >     > -    /// Loads that may alias with those stores are placed in
> >     AliasLoadNodes.
> >     > -    void getStoreMergeAndAliasCandidates(
> >     > -        StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
> >     > -        SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
> >     > +    void getStoreMergeCandidates(StoreSDNode *St,
> >     > +                                 SmallVectorImpl<MemOpLink>
> >     &StoreNodes);
> >     >
> >     >      /// Helper function for MergeConsecutiveStores. Checks if
> >     >      /// Candidate stores have indirect dependency through their
> >     > @@ -480,8 +464,7 @@ namespace {
> >     >      /// This optimization uses wide integers or vectors when
> >     possible.
> >     >      /// \return number of stores that were merged into a merged
> >     store (the
> >     >      /// affected nodes are stored as a prefix in \p StoreNodes).
> >     > -    bool MergeConsecutiveStores(StoreSDNode *N,
> >     > -                                SmallVectorImpl<MemOpLink>
> >     &StoreNodes);
> >     > +    bool MergeConsecutiveStores(StoreSDNode *N);
> >     >
> >     >      /// \brief Try to transform a truncation where C is a
> constant:
> >     >      ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
> >     > @@ -1584,7 +1567,7 @@ SDValue DAGCombiner::visitTokenFactor(SD
> >     >    }
> >     >
> >     >    SmallVector<SDNode *, 8> TFs;     // List of token factors to
> >     visit.
> >     > -  SmallVector<SDValue, 8> Ops;    // Ops for replacing token
> factor.
> >     > +  SmallVector<SDValue, 8> Ops;      // Ops for replacing token
> >     factor.
> >     >    SmallPtrSet<SDNode*, 16> SeenOps;
> >     >    bool Changed = false;             // If we should replace this
> >     token factor.
> >     >
> >     > @@ -1628,6 +1611,86 @@ SDValue DAGCombiner::visitTokenFactor(SD
> >     >      }
> >     >    }
> >     >
> >     > +  // Remove Nodes that are chained to another node in the list.
> Do so
> >     > +  // by walking up chains breath-first stopping when we've seen
> >     > +  // another operand. In general we must climb to the EntryNode,
> >     but we can exit
> >     > +  // early if we find all remaining work is associated with just
> >     one operand as
> >     > +  // no further pruning is possible.
> >     > +
> >     > +  // List of nodes to search through and original Ops from which
> >     they originate.
> >     > +  SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
> >     > +  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each
> Op.
> >     > +  SmallPtrSet<SDNode *, 16> SeenChains;
> >     > +  bool DidPruneOps = false;
> >     > +
> >     > +  unsigned NumLeftToConsider = 0;
> >     > +  for (const SDValue &Op : Ops) {
> >     > +    Worklist.push_back(std::make_pair(Op.getNode(),
> >     NumLeftToConsider++));
> >     > +    OpWorkCount.push_back(1);
> >     > +  }
> >     > +
> >     > +  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned
> >     OpNumber) {
> >     > +    // If this is an Op, we can remove the op from the list.
> >     Remark any
> >     > +    // search associated with it as from the current OpNumber.
> >     > +    if (SeenOps.count(Op) != 0) {
> >     > +      Changed = true;
> >     > +      DidPruneOps = true;
> >     > +      unsigned OrigOpNumber = 0;
> >     > +      while (Ops[OrigOpNumber].getNode() != Op && OrigOpNumber <
> >     Ops.size())
> >     > +        OrigOpNumber++;
> >     > +      assert((OrigOpNumber != Ops.size()) &&
> >     > +             "expected to find TokenFactor Operand");
> >     > +      // Re-mark worklist from OrigOpNumber to OpNumber
> >     > +      for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
> >     > +        if (Worklist[i].second == OrigOpNumber) {
> >     > +          Worklist[i].second = OpNumber;
> >     > +        }
> >     > +      }
> >     > +      OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
> >     > +      OpWorkCount[OrigOpNumber] = 0;
> >     > +      NumLeftToConsider--;
> >     > +    }
> >     > +    // Add if it's a new chain
> >     > +    if (SeenChains.insert(Op).second) {
> >     > +      OpWorkCount[OpNumber]++;
> >     > +      Worklist.push_back(std::make_pair(Op, OpNumber));
> >     > +    }
> >     > +  };
> >     > +
> >     > +  for (unsigned i = 0; i < Worklist.size(); ++i) {
> >     > +    // We need at least be consider at least 2 Ops to prune.
> >     > +    if (NumLeftToConsider <= 1)
> >     > +      break;
> >     > +    auto CurNode = Worklist[i].first;
> >     > +    auto CurOpNumber = Worklist[i].second;
> >     > +    assert((OpWorkCount[CurOpNumber] > 0) &&
> >     > +           "Node should not appear in worklist");
> >     > +    switch (CurNode->getOpcode()) {
> >     > +    case ISD::EntryToken:
> >     > +      // Hitting EntryToken is the only way for the search to
> >     terminate without
> >     > +      // hitting
> >     > +      // another operand's search. Prevent us from marking this
> >     operand
> >     > +      // considered.
> >     > +      NumLeftToConsider++;
> >     > +      break;
> >     > +    case ISD::TokenFactor:
> >     > +      for (const SDValue &Op : CurNode->op_values())
> >     > +        AddToWorklist(i, Op.getNode(), CurOpNumber);
> >     > +      break;
> >     > +    case ISD::CopyFromReg:
> >     > +    case ISD::CopyToReg:
> >     > +      AddToWorklist(i, CurNode->getOperand(0).getNode(),
> >     CurOpNumber);
> >     > +      break;
> >     > +    default:
> >     > +      if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
> >     > +        AddToWorklist(i, MemNode->getChain().getNode(),
> CurOpNumber);
> >     > +      break;
> >     > +    }
> >     > +    OpWorkCount[CurOpNumber]--;
> >     > +    if (OpWorkCount[CurOpNumber] == 0)
> >     > +      NumLeftToConsider--;
> >     > +  }
> >     > +
> >     >    SDValue Result;
> >     >
> >     >    // If we've changed things around then replace token factor.
> >     > @@ -1636,15 +1699,22 @@ SDValue DAGCombiner::visitTokenFactor(SD
> >     >        // The entry token is the only possible outcome.
> >     >        Result = DAG.getEntryNode();
> >     >      } else {
> >     > -      // New and improved token factor.
> >     > -      Result = DAG.getNode(ISD::TokenFactor, SDLoc(N),
> >     MVT::Other, Ops);
> >     > +      if (DidPruneOps) {
> >     > +        SmallVector<SDValue, 8> PrunedOps;
> >     > +        //
> >     > +        for (const SDValue &Op : Ops) {
> >     > +          if (SeenChains.count(Op.getNode()) == 0)
> >     > +            PrunedOps.push_back(Op);
> >     > +        }
> >     > +        Result = DAG.getNode(ISD::TokenFactor, SDLoc(N),
> >     MVT::Other, PrunedOps);
> >     > +      } else {
> >     > +        Result = DAG.getNode(ISD::TokenFactor, SDLoc(N),
> >     MVT::Other, Ops);
> >     > +      }
> >     >      }
> >     >
> >     > -    // Add users to worklist if AA is enabled, since it may
> introduce
> >     > -    // a lot of new chained token factors while removing memory
> deps.
> >     > -    bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
> >     > -      : DAG.getSubtarget().useAA();
> >     > -    return CombineTo(N, Result, UseAA /*add to worklist*/);
> >     > +    // Add users to worklist, since we may introduce a lot of new
> >     > +    // chained token factors while removing memory deps.
> >     > +    return CombineTo(N, Result, true /*add to worklist*/);
> >     >    }
> >     >
> >     >    return Result;
> >     > @@ -6583,6 +6653,9 @@ SDValue DAGCombiner::CombineExtLoad(SDNo
> >     >    SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL,
> >     MVT::Other, Chains);
> >     >    SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT,
> >     Loads);
> >     >
> >     > +  // Simplify TF.
> >     > +  AddToWorklist(NewChain.getNode());
> >     > +
> >     >    CombineTo(N, NewValue);
> >     >
> >     >    // Replace uses of the original load (before extension)
> >     > @@ -10761,11 +10834,12 @@ SDValue DAGCombiner::visitLOAD(SDNode *N
> >     >    // TODO: Handle TRUNCSTORE/LOADEXT
> >     >    if (OptLevel != CodeGenOpt::None &&
> >     >        ISD::isNormalLoad(N) && !LD->isVolatile()) {
> >     > +    // We can forward a direct store or a store off of a
> tokenfactor.
> >     >      if (ISD::isNON_TRUNCStore(Chain.getNode())) {
> >     >        StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
> >     >        if (PrevST->getBasePtr() == Ptr &&
> >     >            PrevST->getValue().getValueType() == N->getValueType(0))
> >     > -      return CombineTo(N, Chain.getOperand(1), Chain);
> >     > +        return CombineTo(N, PrevST->getOperand(1), Chain);
> >     >      }
> >     >    }
> >     >
> >     > @@ -10783,14 +10857,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N
> >     >      }
> >     >    }
> >     >
> >     > -  bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
> >     > -                                                  :
> >     DAG.getSubtarget().useAA();
> >     > -#ifndef NDEBUG
> >     > -  if (CombinerAAOnlyFunc.getNumOccurrences() &&
> >     > -      CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
> >     > -    UseAA = false;
> >     > -#endif
> >     > -  if (UseAA && LD->isUnindexed()) {
> >     > +  if (LD->isUnindexed()) {
> >     >      // Walk up chain skipping non-aliasing memory nodes.
> >     >      SDValue BetterChain = FindBetterChain(N, Chain);
> >     >
> >     > @@ -11372,6 +11439,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N)
> >     >    SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD),
> >     MVT::Other,
> >     >                                ArgChains);
> >     >    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
> >     > +  AddToWorklist(Chain.getNode());
> >     >    return true;
> >     >  }
> >     >
> >     > @@ -11765,20 +11833,6 @@ bool DAGCombiner::isMulAddWithConstProfi
> >     >    return false;
> >     >  }
> >     >
> >     > -SDValue DAGCombiner::getMergedConstantVectorStore(
> >     > -    SelectionDAG &DAG, const SDLoc &SL, ArrayRef<MemOpLink>
> Stores,
> >     > -    SmallVectorImpl<SDValue> &Chains, EVT Ty) const {
> >     > -  SmallVector<SDValue, 8> BuildVector;
> >     > -
> >     > -  for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E;
> ++I) {
> >     > -    StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode);
> >     > -    Chains.push_back(St->getChain());
> >     > -    BuildVector.push_back(St->getValue());
> >     > -  }
> >     > -
> >     > -  return DAG.getBuildVector(Ty, SL, BuildVector);
> >     > -}
> >     > -
> >     >  bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
> >     >                    SmallVectorImpl<MemOpLink> &StoreNodes, EVT
> MemVT,
> >     >                    unsigned NumStores, bool IsConstantSrc, bool
> >     UseVector) {
> >     > @@ -11787,22 +11841,8 @@ bool DAGCombiner::MergeStoresOfConstants
> >     >      return false;
> >     >
> >     >    int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
> >     > -  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
> >     > -  unsigned LatestNodeUsed = 0;
> >     > -
> >     > -  for (unsigned i=0; i < NumStores; ++i) {
> >     > -    // Find a chain for the new wide-store operand. Notice that
> some
> >     > -    // of the store nodes that we found may not be selected for
> >     inclusion
> >     > -    // in the wide store. The chain we use needs to be the chain
> >     of the
> >     > -    // latest store node which is *used* and replaced by the wide
> >     store.
> >     > -    if (StoreNodes[i].SequenceNum <
> >     StoreNodes[LatestNodeUsed].SequenceNum)
> >     > -      LatestNodeUsed = i;
> >     > -  }
> >     > -
> >     > -  SmallVector<SDValue, 8> Chains;
> >     >
> >     >    // The latest Node in the DAG.
> >     > -  LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
> >     >    SDLoc DL(StoreNodes[0].MemNode);
> >     >
> >     >    SDValue StoredVal;
> >     > @@ -11818,7 +11858,18 @@ bool DAGCombiner::MergeStoresOfConstants
> >     >      assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
> >     >
> >     >      if (IsConstantSrc) {
> >     > -      StoredVal = getMergedConstantVectorStore(DAG, DL,
> >     StoreNodes, Chains, Ty);
> >     > +      SmallVector<SDValue, 8> BuildVector;
> >     > +      for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E;
> >     ++I) {
> >     > +        StoreSDNode *St =
> cast<StoreSDNode>(StoreNodes[I].MemNode);
> >     > +        SDValue Val = St->getValue();
> >     > +        if (MemVT.getScalarType().isInteger())
> >     > +          if (auto *CFP =
> dyn_cast<ConstantFPSDNode>(St->getValue()))
> >     > +            Val = DAG.getConstant(
> >     > +
> >     (uint32_t)CFP->getValueAPF().bitcastToAPInt().getZExtValue(),
> >     > +                SDLoc(CFP), MemVT);
> >     > +        BuildVector.push_back(Val);
> >     > +      }
> >     > +      StoredVal = DAG.getBuildVector(Ty, DL, BuildVector);
> >     >      } else {
> >     >        SmallVector<SDValue, 8> Ops;
> >     >        for (unsigned i = 0; i < NumStores; ++i) {
> >     > @@ -11828,7 +11879,6 @@ bool DAGCombiner::MergeStoresOfConstants
> >     >          if (Val.getValueType() != MemVT)
> >     >            return false;
> >     >          Ops.push_back(Val);
> >     > -        Chains.push_back(St->getChain());
> >     >        }
> >     >
> >     >        // Build the extracted vector elements back into a vector.
> >     > @@ -11848,7 +11898,6 @@ bool DAGCombiner::MergeStoresOfConstants
> >     >      for (unsigned i = 0; i < NumStores; ++i) {
> >     >        unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
> >     >        StoreSDNode *St  =
> cast<StoreSDNode>(StoreNodes[Idx].MemNode);
> >     > -      Chains.push_back(St->getChain());
> >     >
> >     >        SDValue Val = St->getValue();
> >     >        StoreInt <<= ElementSizeBytes * 8;
> >     > @@ -11866,54 +11915,36 @@ bool DAGCombiner::MergeStoresOfConstants
> >     >      StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
> >     >    }
> >     >
> >     > -  assert(!Chains.empty());
> >     > +  SmallVector<SDValue, 8> Chains;
> >     > +
> >     > +  // Gather all Chains we're inheriting. As generally all chains
> are
> >     > +  // equal, do minor check to remove obvious redundancies.
> >     > +  Chains.push_back(StoreNodes[0].MemNode->getChain());
> >     > +  for (unsigned i = 1; i < NumStores; ++i)
> >     > +    if (StoreNodes[0].MemNode->getChain() !=
> >     StoreNodes[i].MemNode->getChain())
> >     > +      Chains.push_back(StoreNodes[i].MemNode->getChain());
> >     >
> >     > +  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
> >     >    SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL,
> >     MVT::Other, Chains);
> >     >    SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal,
> >     >                                    FirstInChain->getBasePtr(),
> >     >                                    FirstInChain->getPointerInfo(),
> >     >                                    FirstInChain->getAlignment());
> >     >
> >     > -  bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
> >     > -                                                  :
> >     DAG.getSubtarget().useAA();
> >     > -  if (UseAA) {
> >     > -    // Replace all merged stores with the new store.
> >     > -    for (unsigned i = 0; i < NumStores; ++i)
> >     > -      CombineTo(StoreNodes[i].MemNode, NewStore);
> >     > -  } else {
> >     > -    // Replace the last store with the new store.
> >     > -    CombineTo(LatestOp, NewStore);
> >     > -    // Erase all other stores.
> >     > -    for (unsigned i = 0; i < NumStores; ++i) {
> >     > -      if (StoreNodes[i].MemNode == LatestOp)
> >     > -        continue;
> >     > -      StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
> >     > -      // ReplaceAllUsesWith will replace all uses that existed
> >     when it was
> >     > -      // called, but graph optimizations may cause new ones to
> >     appear. For
> >     > -      // example, the case in pr14333 looks like
> >     > -      //
> >     > -      //  St's chain -> St -> another store -> X
> >     > -      //
> >     > -      // And the only difference from St to the other store is
> >     the chain.
> >     > -      // When we change it's chain to be St's chain they become
> >     identical,
> >     > -      // get CSEed and the net result is that X is now a use of
> St.
> >     > -      // Since we know that St is redundant, just iterate.
> >     > -      while (!St->use_empty())
> >     > -        DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
> >     > -      deleteAndRecombine(St);
> >     > -    }
> >     > -  }
> >     > +  // Replace all merged stores with the new store.
> >     > +  for (unsigned i = 0; i < NumStores; ++i)
> >     > +    CombineTo(StoreNodes[i].MemNode, NewStore);
> >     >
> >     > -  StoreNodes.erase(StoreNodes.begin() + NumStores,
> StoreNodes.end());
> >     > +  AddToWorklist(NewChain.getNode());
> >     >    return true;
> >     >  }
> >     >
> >     > -void DAGCombiner::getStoreMergeAndAliasCandidates(
> >     > -    StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
> >     > -    SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) {
> >     > +void DAGCombiner::getStoreMergeCandidates(
> >     > +    StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
> >     >    // This holds the base pointer, index, and the offset in bytes
> >     from the base
> >     >    // pointer.
> >     >    BaseIndexOffset BasePtr =
> >     BaseIndexOffset::match(St->getBasePtr(), DAG);
> >     > +  EVT MemVT = St->getMemoryVT();
> >     >
> >     >    // We must have a base and an offset.
> >     >    if (!BasePtr.Base.getNode())
> >     > @@ -11923,104 +11954,70 @@ void DAGCombiner::getStoreMergeAndAliasC
> >     >    if (BasePtr.Base.isUndef())
> >     >      return;
> >     >
> >     > -  // Walk up the chain and look for nodes with offsets from the
> same
> >     > -  // base pointer. Stop when reaching an instruction with a
> >     different kind
> >     > -  // or instruction which has a different base pointer.
> >     > -  EVT MemVT = St->getMemoryVT();
> >     > -  unsigned Seq = 0;
> >     > -  StoreSDNode *Index = St;
> >     > -
> >     > -
> >     > -  bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
> >     > -                                                  :
> >     DAG.getSubtarget().useAA();
> >     > -
> >     > -  if (UseAA) {
> >     > -    // Look at other users of the same chain. Stores on the same
> >     chain do not
> >     > -    // alias. If combiner-aa is enabled, non-aliasing stores are
> >     canonicalized
> >     > -    // to be on the same chain, so don't bother looking at
> >     adjacent chains.
> >     > -
> >     > -    SDValue Chain = St->getChain();
> >     > -    for (auto I = Chain->use_begin(), E = Chain->use_end(); I !=
> >     E; ++I) {
> >     > -      if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
> >     > -        if (I.getOperandNo() != 0)
> >     > -          continue;
> >     > -
> >     > -        if (OtherST->isVolatile() || OtherST->isIndexed())
> >     > -          continue;
> >     > -
> >     > -        if (OtherST->getMemoryVT() != MemVT)
> >     > -          continue;
> >     > -
> >     > -        BaseIndexOffset Ptr =
> >     BaseIndexOffset::match(OtherST->getBasePtr(), DAG);
> >     > -
> >     > -        if (Ptr.equalBaseIndex(BasePtr))
> >     > -          StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset,
> >     Seq++));
> >     > -      }
> >     > -    }
> >     > -
> >     > -    return;
> >     > -  }
> >     > -
> >     > -  while (Index) {
> >     > -    // If the chain has more than one use, then we can't reorder
> >     the mem ops.
> >     > -    if (Index != St && !SDValue(Index, 0)->hasOneUse())
> >     > -      break;
> >     > -
> >     > -    // Find the base pointer and offset for this memory node.
> >     > -    BaseIndexOffset Ptr =
> >     BaseIndexOffset::match(Index->getBasePtr(), DAG);
> >     > -
> >     > -    // Check that the base pointer is the same as the original
> one.
> >     > -    if (!Ptr.equalBaseIndex(BasePtr))
> >     > -      break;
> >     > +  // We looking for a root node which is an ancestor to all
> mergable
> >     > +  // stores. We search up through a load, to our root and then
> down
> >     > +  // through all children. For instance we will find Store{1,2,3}
> if
> >     > +  // St is Store1, Store2. or Store3 where the root is not a load
> >     > +  // which always true for nonvolatile ops. TODO: Expand
> >     > +  // the search to find all valid candidates through multiple
> >     layers of loads.
> >     > +  //
> >     > +  // Root
> >     > +  // |-------|-------|
> >     > +  // Load    Load    Store3
> >     > +  // |       |
> >     > +  // Store1   Store2
> >     > +  //
> >     > +  // FIXME: We should be able to climb and
> >     > +  // descend TokenFactors to find candidates as well.
> >     >
> >     > -    // The memory operands must not be volatile.
> >     > -    if (Index->isVolatile() || Index->isIndexed())
> >     > -      break;
> >     > +  SDNode *RootNode = (St->getChain()).getNode();
> >     >
> >     > -    // No truncation.
> >     > -    if (Index->isTruncatingStore())
> >     > -      break;
> >     > +  // Set of Parents of Candidates
> >     > +  std::set<SDNode *> CandidateParents;
> >     >
> >     > -    // The stored memory type must be the same.
> >     > -    if (Index->getMemoryVT() != MemVT)
> >     > -      break;
> >     > -
> >     > -    // We do not allow under-aligned stores in order to prevent
> >     > -    // overriding stores. NOTE: this is a bad hack. Alignment
> SHOULD
> >     > -    // be irrelevant here; what MATTERS is that we not move memory
> >     > -    // operations that potentially overlap past each-other.
> >     > -    if (Index->getAlignment() < MemVT.getStoreSize())
> >     > -      break;
> >     > +  if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
> >     > +    RootNode = Ldn->getChain().getNode();
> >     > +    for (auto I = RootNode->use_begin(), E = RootNode->use_end();
> >     I != E; ++I)
> >     > +      if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk
> >     down chain
> >     > +        CandidateParents.insert(*I);
> >     > +  } else
> >     > +    CandidateParents.insert(RootNode);
> >     >
> >     > -    // We found a potential memory operand to merge.
> >     > -    StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
> >     > +  bool IsLoadSrc = isa<LoadSDNode>(St->getValue());
> >     > +  bool IsConstantSrc = isa<ConstantSDNode>(St->getValue()) ||
> >     > +                       isa<ConstantFPSDNode>(St->getValue());
> >     > +  bool IsExtractVecSrc =
> >     > +      (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
> >     > +       St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR);
> >     > +  auto CorrectValueKind = [&](StoreSDNode *Other) -> bool {
> >     > +    if (IsLoadSrc)
> >     > +      return isa<LoadSDNode>(Other->getValue());
> >     > +    if (IsConstantSrc)
> >     > +      return (isa<ConstantSDNode>(Other->getValue()) ||
> >     > +              isa<ConstantFPSDNode>(Other->getValue()));
> >     > +    if (IsExtractVecSrc)
> >     > +      return (Other->getValue().getOpcode() ==
> >     ISD::EXTRACT_VECTOR_ELT ||
> >     > +              Other->getValue().getOpcode() ==
> >     ISD::EXTRACT_SUBVECTOR);
> >     > +    return false;
> >     > +  };
> >     >
> >     > -    // Find the next memory operand in the chain. If the next
> >     operand in the
> >     > -    // chain is a store then move up and continue the scan with
> >     the next
> >     > -    // memory operand. If the next operand is a load save it and
> >     use alias
> >     > -    // information to check if it interferes with anything.
> >     > -    SDNode *NextInChain = Index->getChain().getNode();
> >     > -    while (1) {
> >     > -      if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
> >     > -        // We found a store node. Use it for the next iteration.
> >     > -        Index = STn;
> >     > -        break;
> >     > -      } else if (LoadSDNode *Ldn =
> >     dyn_cast<LoadSDNode>(NextInChain)) {
> >     > -        if (Ldn->isVolatile()) {
> >     > -          Index = nullptr;
> >     > -          break;
> >     > +  // check all parents of mergable children
> >     > +  for (auto P = CandidateParents.begin(); P !=
> >     CandidateParents.end(); ++P)
> >     > +    for (auto I = (*P)->use_begin(), E = (*P)->use_end(); I != E;
> >     ++I)
> >     > +      if (I.getOperandNo() == 0)
> >     > +        if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
> >     > +          if (OtherST->isVolatile() || OtherST->isIndexed())
> >     > +            continue;
> >     > +          // We can merge constant floats to equivalent integers
> >     > +          if (OtherST->getMemoryVT() != MemVT)
> >     > +            if (!(MemVT.isInteger() &&
> >     MemVT.bitsEq(OtherST->getMemoryVT()) &&
> >     > +                  isa<ConstantFPSDNode>(OtherST->getValue())))
> >     > +              continue;
> >     > +          BaseIndexOffset Ptr =
> >     > +              BaseIndexOffset::match(OtherST->getBasePtr(), DAG);
> >     > +          if (Ptr.equalBaseIndex(BasePtr) &&
> >     CorrectValueKind(OtherST))
> >     > +            StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset));
> >     >          }
> >     > -
> >     > -        // Save the load node for later. Continue the scan.
> >     > -        AliasLoadNodes.push_back(Ldn);
> >     > -        NextInChain = Ldn->getChain().getNode();
> >     > -        continue;
> >     > -      } else {
> >     > -        Index = nullptr;
> >     > -        break;
> >     > -      }
> >     > -    }
> >     > -  }
> >     >  }
> >     >
> >     >  // We need to check that merging these stores does not cause a
> loop
> >     > @@ -12047,8 +12044,7 @@ bool DAGCombiner::checkMergeStoreCandida
> >     >    return true;
> >     >  }
> >     >
> >     > -bool DAGCombiner::MergeConsecutiveStores(
> >     > -    StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes) {
> >     > +bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
> >     >    if (OptLevel == CodeGenOpt::None)
> >     >      return false;
> >     >
> >     > @@ -12082,145 +12078,136 @@ bool
> DAGCombiner::MergeConsecutiveStores
> >     >    if (MemVT.isVector() && IsLoadSrc)
> >     >      return false;
> >     >
> >     > -  // Only look at ends of store sequences.
> >     > -  SDValue Chain = SDValue(St, 0);
> >     > -  if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() ==
> >     ISD::STORE)
> >     > -    return false;
> >     > -
> >     > -  // Save the LoadSDNodes that we find in the chain.
> >     > -  // We need to make sure that these nodes do not interfere with
> >     > -  // any of the store nodes.
> >     > -  SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
> >     > -
> >     > -  getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
> >     > +  SmallVector<MemOpLink, 8> StoreNodes;
> >     > +  // Find potential store merge candidates by searching through
> >     chain sub-DAG
> >     > +  getStoreMergeCandidates(St, StoreNodes);
> >     >
> >     >    // Check if there is anything to merge.
> >     >    if (StoreNodes.size() < 2)
> >     >      return false;
> >     >
> >     > -  // only do dependence check in AA case
> >     > -  bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
> >     > -                                                  :
> >     DAG.getSubtarget().useAA();
> >     > -  if (UseAA &&
> !checkMergeStoreCandidatesForDependencies(StoreNodes))
> >     > +  // Check that we can merge these candidates without causing a
> cycle
> >     > +  if (!checkMergeStoreCandidatesForDependencies(StoreNodes))
> >     >      return false;
> >     >
> >     >    // Sort the memory operands according to their distance from the
> >     > -  // base pointer.  As a secondary criteria: make sure stores
> coming
> >     > -  // later in the code come first in the list. This is important
> for
> >     > -  // the non-UseAA case, because we're merging stores into the
> FINAL
> >     > -  // store along a chain which potentially contains aliasing
> stores.
> >     > -  // Thus, if there are multiple stores to the same address, the
> last
> >     > -  // one can be considered for merging but not the others.
> >     > +  // base pointer.
> >     >    std::sort(StoreNodes.begin(), StoreNodes.end(),
> >     >              [](MemOpLink LHS, MemOpLink RHS) {
> >     > -    return LHS.OffsetFromBase < RHS.OffsetFromBase ||
> >     > -           (LHS.OffsetFromBase == RHS.OffsetFromBase &&
> >     > -            LHS.SequenceNum < RHS.SequenceNum);
> >     > -  });
> >     > +              return LHS.OffsetFromBase < RHS.OffsetFromBase;
> >     > +            });
> >     >
> >     >    // Scan the memory operations on the chain and find the first
> >     non-consecutive
> >     >    // store memory address.
> >     > -  unsigned LastConsecutiveStore = 0;
> >     > +  unsigned NumConsecutiveStores = 0;
> >     >    int64_t StartAddress = StoreNodes[0].OffsetFromBase;
> >     > -  for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
> >     > -
> >     > -    // Check that the addresses are consecutive starting from the
> >     second
> >     > -    // element in the list of stores.
> >     > -    if (i > 0) {
> >     > -      int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
> >     > -      if (CurrAddress - StartAddress != (ElementSizeBytes * i))
> >     > -        break;
> >     > -    }
> >     >
> >     > -    // Check if this store interferes with any of the loads that
> >     we found.
> >     > -    // If we find a load that alias with this store. Stop the
> >     sequence.
> >     > -    if (any_of(AliasLoadNodes, [&](LSBaseSDNode *Ldn) {
> >     > -          return isAlias(Ldn, StoreNodes[i].MemNode);
> >     > -        }))
> >     > +  // Check that the addresses are consecutive starting from the
> >     second
> >     > +  // element in the list of stores.
> >     > +  for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
> >     > +    int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
> >     > +    if (CurrAddress - StartAddress != (ElementSizeBytes * i))
> >     >        break;
> >     > -
> >     > -    // Mark this node as useful.
> >     > -    LastConsecutiveStore = i;
> >     > +    NumConsecutiveStores = i + 1;
> >     >    }
> >     >
> >     > +  if (NumConsecutiveStores < 2)
> >     > +    return false;
> >     > +
> >     >    // The node with the lowest store address.
> >     > -  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
> >     > -  unsigned FirstStoreAS = FirstInChain->getAddressSpace();
> >     > -  unsigned FirstStoreAlign = FirstInChain->getAlignment();
> >     >    LLVMContext &Context = *DAG.getContext();
> >     >    const DataLayout &DL = DAG.getDataLayout();
> >     >
> >     >    // Store the constants into memory as one consecutive store.
> >     >    if (IsConstantSrc) {
> >     > -    unsigned LastLegalType = 0;
> >     > -    unsigned LastLegalVectorType = 0;
> >     > -    bool NonZero = false;
> >     > -    for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
> >     > -      StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
> >     > -      SDValue StoredVal = St->getValue();
> >     > -
> >     > -      if (ConstantSDNode *C =
> dyn_cast<ConstantSDNode>(StoredVal)) {
> >     > -        NonZero |= !C->isNullValue();
> >     > -      } else if (ConstantFPSDNode *C =
> >     dyn_cast<ConstantFPSDNode>(StoredVal)) {
> >     > -        NonZero |= !C->getConstantFPValue()->isNullValue();
> >     > -      } else {
> >     > -        // Non-constant.
> >     > -        break;
> >     > -      }
> >     > +    bool RV = false;
> >     > +    while (NumConsecutiveStores > 1) {
> >     > +      LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
> >     > +      unsigned FirstStoreAS = FirstInChain->getAddressSpace();
> >     > +      unsigned FirstStoreAlign = FirstInChain->getAlignment();
> >     > +      unsigned LastLegalType = 0;
> >     > +      unsigned LastLegalVectorType = 0;
> >     > +      bool NonZero = false;
> >     > +      for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
> >     > +        StoreSDNode *ST =
> cast<StoreSDNode>(StoreNodes[i].MemNode);
> >     > +        SDValue StoredVal = ST->getValue();
> >     > +
> >     > +        if (ConstantSDNode *C =
> >     dyn_cast<ConstantSDNode>(StoredVal)) {
> >     > +          NonZero |= !C->isNullValue();
> >     > +        } else if (ConstantFPSDNode *C =
> >     > +                       dyn_cast<ConstantFPSDNode>(StoredVal)) {
> >     > +          NonZero |= !C->getConstantFPValue()->isNullValue();
> >     > +        } else {
> >     > +          // Non-constant.
> >     > +          break;
> >     > +        }
> >     >
> >     > -      // Find a legal type for the constant store.
> >     > -      unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
> >     > -      EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
> >     > -      bool IsFast;
> >     > -      if (TLI.isTypeLegal(StoreTy) &&
> >     > -          TLI.allowsMemoryAccess(Context, DL, StoreTy,
> FirstStoreAS,
> >     > -                                 FirstStoreAlign, &IsFast) &&
> >     IsFast) {
> >     > -        LastLegalType = i+1;
> >     > -      // Or check whether a truncstore is legal.
> >     > -      } else if (TLI.getTypeAction(Context, StoreTy) ==
> >     > -                 TargetLowering::TypePromoteInteger) {
> >     > -        EVT LegalizedStoredValueTy =
> >     > -          TLI.getTypeToTransformTo(Context,
> >     StoredVal.getValueType());
> >     > -        if (TLI.isTruncStoreLegal(LegalizedStoredValueTy,
> StoreTy) &&
> >     > -            TLI.allowsMemoryAccess(Context, DL,
> >     LegalizedStoredValueTy,
> >     > -                                   FirstStoreAS, FirstStoreAlign,
> >     &IsFast) &&
> >     > +        // Find a legal type for the constant store.
> >     > +        unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
> >     > +        EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
> >     > +        bool IsFast = false;
> >     > +        if (TLI.isTypeLegal(StoreTy) &&
> >     > +            TLI.allowsMemoryAccess(Context, DL, StoreTy,
> >     FirstStoreAS,
> >     > +                                   FirstStoreAlign, &IsFast) &&
> >     >              IsFast) {
> >     >            LastLegalType = i + 1;
> >     > +          // Or check whether a truncstore is legal.
> >     > +        } else if (TLI.getTypeAction(Context, StoreTy) ==
> >     > +                   TargetLowering::TypePromoteInteger) {
> >     > +          EVT LegalizedStoredValueTy =
> >     > +              TLI.getTypeToTransformTo(Context,
> >     StoredVal.getValueType());
> >     > +          if (TLI.isTruncStoreLegal(LegalizedStoredValueTy,
> >     StoreTy) &&
> >     > +              TLI.allowsMemoryAccess(Context, DL,
> >     LegalizedStoredValueTy,
> >     > +                                     FirstStoreAS,
> >     FirstStoreAlign, &IsFast) &&
> >     > +              IsFast) {
> >     > +            LastLegalType = i + 1;
> >     > +          }
> >     >          }
> >     > -      }
> >     >
> >     > -      // We only use vectors if the constant is known to be zero
> >     or the target
> >     > -      // allows it and the function is not marked with the
> >     noimplicitfloat
> >     > -      // attribute.
> >     > -      if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT,
> i+1,
> >     > -
> >     FirstStoreAS)) &&
> >     > -          !NoVectors) {
> >     > -
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170303/d3afcf31/attachment.html>


More information about the llvm-commits mailing list