[PATCH] D21692: [DAGCombiner] Fix visitSTORE to continue processing current SDNode, if findBetterNeighborChains doesn't actually CombineTo it.

Justin Bogner via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 30 16:18:43 PDT 2016


Tim Shen <timshen at google.com> writes:
> timshen created this revision.
> timshen added reviewers: echristo, wschmidt, hfinkel, kbarton,
> amehsan, arsenm, nemanjai, bogner.
> timshen added subscribers: llvm-commits, nemanjai, mehdi_amini.
>
> findBetterNeighborChains may or may not find a better chain for each
> node it finds, which include the node ("St") that visitSTORE is
> currently processing. If no better chain is found for St, visitSTORE
> should continue instead of return SDValue(St, 0), as if it's
> CombinedTo'ed.
>
> This fixes bug 28130. There might be other ways to make the test pass
> (see D21409). I think both of the patches are fixing actual bugs
> revealed by the same testcase.
>
> http://reviews.llvm.org/D21692
>
> Files:
>   lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>   test/CodeGen/PowerPC/pr28130.ll
>
> Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> ===================================================================
> --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> @@ -393,8 +393,8 @@
>      SDValue FindBetterChain(SDNode *N, SDValue Chain);
>  
>      /// Do FindBetterChain for a store and any possibly adjacent stores on
> -    /// consecutive chains.
> -    bool findBetterNeighborChains(StoreSDNode *St);
> +    /// consecutive chains. Return true if found a better chain for St.
> +    bool findBetterNeighborChains(SDValue &Chain, StoreSDNode *St);

I find this new API very confusing, and the addition you've made to the
comment doesn't even mention what happens with Chain.

Wouldn't it make more sense to update Chain outside of
findBetterNeighbourChains anyway? Unless I'm missing something, this
patch is equivalent to something like this:

diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 51f58bd..ab77344 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12097,7 +12097,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
       // replaceStoreChain uses CombineTo, which handled all of the worklist
       // manipulation. Return the original node to not do anything else.
       return SDValue(ST, 0);
-    }
+    } else
+      Chain = ST->getChain();
   }
 
   // Try transforming N to an indexed store.
@@ -15007,7 +15008,8 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) {
     SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
 
     if (Chain != BetterChain) {
-      MadeChange = true;
+      if (ChainedStore == St)
+        MadeChange = true;
       BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
     }
   }

>  
>      /// Match "(X shl/srl V1) & V2" where V2 may not be present.
>      bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
> @@ -12086,7 +12086,7 @@
>  
>      // Walk up chain skipping non-aliasing memory nodes, on this store and any
>      // adjacent stores.
> -    if (findBetterNeighborChains(ST)) {
> +    if (findBetterNeighborChains(Chain, ST)) {
>        // replaceStoreChain uses CombineTo, which handled all of the worklist
>        // manipulation. Return the original node to not do anything else.
>        return SDValue(ST, 0);
> @@ -14940,7 +14940,7 @@
>    return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
>  }
>  
> -bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) {
> +bool DAGCombiner::findBetterNeighborChains(SDValue &Chain, StoreSDNode *St) {
>    // This holds the base pointer, index, and the offset in bytes from the base
>    // pointer.
>    BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
> @@ -15000,15 +15000,16 @@
>      }
>    }
>  
> -  bool MadeChange = false;
> +  bool MadeChangeToSt = false;
>    SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
>  
>    for (StoreSDNode *ChainedStore : ChainedStores) {
>      SDValue Chain = ChainedStore->getChain();
>      SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
>  
>      if (Chain != BetterChain) {
> -      MadeChange = true;
> +      if (ChainedStore == St)
> +        MadeChangeToSt = true;
>        BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
>      }
>    }
> @@ -15018,7 +15019,10 @@
>    for (auto Replacement : BetterChains)
>      replaceStoreChain(Replacement.first, Replacement.second);
>  
> -  return MadeChange;
> +  if (!MadeChangeToSt)
> +    Chain = St->getChain();
> +
> +  return MadeChangeToSt;
>  }
>  
>  /// This is the entry point for the file.
> Index: test/CodeGen/PowerPC/pr28130.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/PowerPC/pr28130.ll
> @@ -0,0 +1,70 @@
> +; RUN: llc -O0 < %s | FileCheck %s
> +target triple = "powerpc64le-unknown-linux-gnu"
> +
> +%StructA = type { double, double, double, double, double, double, double, double }
> +
> +define void @Test(%StructA* %tmp) unnamed_addr #0 align 2 {
> +; CHECK-LABEL: Test:
> +; CHECK: lxvd2x
> +; CHECK-NEXT: xxswapd
> +; CHECK: lxvd2x
> +; CHECK-NEXT: xxswapd
> +; CHECK: lxvd2x
> +; CHECK-NEXT: xxswapd
> +; CHECK: lxvd2x
> +; CHECK-NEXT: xxswapd
> +; CHECK: xxswapd [[OUTPUT:[0-9]+]]
> +; CHECK-NEXT: stxvd2x [[OUTPUT]]
> +bb:
> +  %tmp2 = getelementptr inbounds %StructA, %StructA* %tmp, i64 0, i32 0
> +  %tmp4 = bitcast %StructA* %tmp to <2 x double>*
> +  %tmp5 = getelementptr inbounds %StructA, %StructA* %tmp, i64 0, i32 2
> +  %tmp9 = getelementptr inbounds %StructA, %StructA* %tmp, i64 0, i32 4
> +  %tmp11 = getelementptr inbounds %StructA, %StructA* %tmp, i64 0, i32 5
> +  %tmp13 = getelementptr inbounds %StructA, %StructA* %tmp, i64 0, i32 6
> +  %tmp15 = getelementptr inbounds %StructA, %StructA* %tmp, i64 0, i32 7
> +  %tmp18 = load double, double* %tmp2, align 16
> +  %tmp19 = load double, double* %tmp11, align 8
> +  %tmp20 = load double, double* %tmp9, align 16
> +  %tmp21 = fsub double 1.210000e+04, %tmp20
> +  %tmp22 = fmul double %tmp18, %tmp21
> +  %tmp23 = fadd double %tmp20, %tmp22
> +  %tmp24 = load double, double* %tmp13, align 16
> +  %tmp25 = fsub double 1.000000e+02, %tmp24
> +  %tmp26 = fmul double %tmp18, %tmp25
> +  %tmp27 = fadd double %tmp24, %tmp26
> +  %tmp28 = load double, double* %tmp15, align 8
> +  %tmp29 = insertelement <2 x double> undef, double %tmp19, i32 0
> +  %tmp30 = insertelement <2 x double> %tmp29, double %tmp28, i32 1
> +  %tmp31 = fsub <2 x double> <double 1.100000e+04, double 1.100000e+02>, %tmp30
> +  %tmp32 = insertelement <2 x double> undef, double %tmp18, i32 0
> +  %tmp33 = insertelement <2 x double> %tmp32, double %tmp18, i32 1
> +  %tmp34 = fmul <2 x double> %tmp33, %tmp31
> +  %tmp35 = fadd <2 x double> %tmp30, %tmp34
> +  %tmp36 = bitcast double* %tmp5 to <2 x double>*
> +  %tmp37 = load <2 x double>, <2 x double>* %tmp36, align 16
> +  %tmp38 = fsub <2 x double> <double 1.000000e+00, double 1.000000e+04>, %tmp37
> +  %tmp39 = fmul <2 x double> %tmp33, %tmp38
> +  %tmp40 = fadd <2 x double> %tmp37, %tmp39
> +  %tmp41 = fsub <2 x double> <double 1.000000e+00, double 1.000000e+04>, %tmp40
> +  %tmp42 = fmul <2 x double> %tmp33, %tmp41
> +  %tmp43 = fadd <2 x double> %tmp40, %tmp42
> +  %tmp44 = fsub <2 x double> <double 1.200000e+04, double 1.200000e+02>, %tmp35
> +  %tmp45 = fmul <2 x double> %tmp33, %tmp44
> +  %tmp46 = fadd <2 x double> %tmp35, %tmp45
> +  %tmp48 = fsub double 1.440000e+04, %tmp23
> +  %tmp49 = fmul double %tmp18, %tmp48
> +  %tmp50 = fadd double %tmp23, %tmp49
> +  store double %tmp50, double* %tmp9, align 16
> +  %tmp51 = fsub double 1.000000e+02, %tmp27
> +  %tmp52 = fmul double %tmp18, %tmp51
> +  %tmp53 = fadd double %tmp27, %tmp52
> +  store double %tmp53, double* %tmp13, align 16
> +  %tmp54 = extractelement <2 x double> %tmp46, i32 1
> +  store double %tmp54, double* %tmp15, align 8
> +  %tmp55 = bitcast double* %tmp5 to <2 x double>*
> +  store <2 x double> %tmp43, <2 x double>* %tmp55, align 16
> +  ret void
> +}
> +
> +attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }


More information about the llvm-commits mailing list