[llvm] r327172 - Correct load-op-store cycle detection analysis

Nirav Dave via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 9 12:58:07 PST 2018


Author: niravd
Date: Fri Mar  9 12:58:07 2018
New Revision: 327172

URL: http://llvm.org/viewvc/llvm-project?rev=327172&view=rev
Log:
Correct load-op-store cycle detection analysis

Add missing cycle dependency checks in load-op-store fusion.

Fixes PR36274.

Reviewers: craig.topper, bogner

Subscribers: hiraditya, llvm-commits

Differential Revision: https://reviews.llvm.org/D43154

Added:
    llvm/trunk/test/CodeGen/X86/pr36274.ll
Modified:
    llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp

Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=327172&r1=327171&r2=327172&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Fri Mar  9 12:58:07 2018
@@ -2109,27 +2109,56 @@ static bool isFusableLoadOpStorePattern(
       LoadNode->getOffset() != StoreNode->getOffset())
     return false;
 
-  // Check if the chain is produced by the load or is a TokenFactor with
-  // the load output chain as an operand. Return InputChain by reference.
+  bool FoundLoad = false;
+  SmallVector<SDValue, 4> ChainOps;
+  SmallVector<const SDNode *, 4> LoopWorklist;
+  SmallPtrSet<const SDNode *, 16> Visited;
+  const unsigned int Max = 1024;
+
+  //  Visualization of Load-Op-Store fusion:
+  // -------------------------
+  // Legend:
+  //    *-lines = Chain operand dependencies.
+  //    |-lines = Normal operand dependencies.
+  //    Dependencies flow down and right. n-suffix references multiple nodes.
+  //
+  //        C                        Xn  C
+  //        *                         *  *
+  //        *                          * *
+  //  Xn  A-LD    Yn                    TF         Yn
+  //   *    * \   |                       *        |
+  //    *   *  \  |                        *       |
+  //     *  *   \ |             =>       A--LD_OP_ST
+  //      * *    \|                                 \
+  //       TF    OP                                  \
+  //         *   | \                                  Zn
+  //          *  |  \
+  //         A-ST    Zn
+  //
+
+  // This merge induced dependences from: #1: Xn -> LD, OP, Zn
+  //                                      #2: Yn -> LD
+  //                                      #3: ST -> Zn
+
+  // Ensure the transform is safe by checking for the dual
+  // dependencies to make sure we do not induce a loop.
+
+  // As LD is a predecessor to both OP and ST we can do this by checking:
+  //  a). if LD is a predecessor to a member of Xn or Yn.
+  //  b). if a Zn is a predecessor to ST.
+
+  // However, (b) can only occur through being a chain predecessor to
+  // ST, which is the same as Zn being a member or predecessor of Xn,
+  // which is a subset of LD being a predecessor of Xn. So it's
+  // subsumed by check (a).
+
   SDValue Chain = StoreNode->getChain();
 
+  // Gather X elements in ChainOps.
   if (Chain == Load.getValue(1)) {
-    InputChain = LoadNode->getChain();
-    return true;
-  }
-
-  if (Chain.getOpcode() == ISD::TokenFactor) {
-    // Fusing Load-Op-Store requires predecessors of store must also
-    // be predecessors of the load. This addition may cause a loop. We
-    // can check this by doing a search for Load in the new
-    // dependencies. As this can be expensive, heuristically prune
-    // this search by visiting the uses and make sure they all have
-    // smaller node id than the load.
-
-    bool FoundLoad = false;
-    SmallVector<SDValue, 4> ChainOps;
-    SmallVector<const SDNode *, 4> LoopWorklist;
-    SmallPtrSet<const SDNode *, 16> Visited;
+    FoundLoad = true;
+    ChainOps.push_back(Load.getOperand(0));
+  } else if (Chain.getOpcode() == ISD::TokenFactor) {
     for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
       SDValue Op = Chain.getOperand(i);
       if (Op == Load.getValue(1)) {
@@ -2141,31 +2170,25 @@ static bool isFusableLoadOpStorePattern(
       LoopWorklist.push_back(Op.getNode());
       ChainOps.push_back(Op);
     }
+  }
 
-    if (!FoundLoad)
-      return false;
+  if (!FoundLoad)
+    return false;
 
-    // If Loop Worklist is not empty. Check if we would make a loop.
-    if (!LoopWorklist.empty()) {
-      const unsigned int Max = 8192;
-      // if Load is predecessor to potentially loop inducing chain
-      // dependencies.
-      if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist,
-                                       Max, true))
-        return false;
-      // Fail conservatively if we ended loop search early.
-      if (Visited.size() >= Max)
-        return false;
-    }
+  // Worklist is currently Xn. Add Yn to worklist.
+  for (SDValue Op : StoredVal->ops())
+    if (Op.getNode() != LoadNode)
+      LoopWorklist.push_back(Op.getNode());
+
+  // Check (a) if Load is a predecessor to Xn + Yn
+  if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
+                                   true))
+    return false;
 
-    // Make a new TokenFactor with all the other input chains except
-    // for the load.
-    InputChain =
-        CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps);
-    return true;
+  InputChain =
+      CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps);
+  return true;
   }
-  return false;
-}
 
 // Change a chain of {load; op; store} of the same value into a simple op
 // through memory of that value, if the uses of the modified value and its

Added: llvm/trunk/test/CodeGen/X86/pr36274.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr36274.ll?rev=327172&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr36274.ll (added)
+++ llvm/trunk/test/CodeGen/X86/pr36274.ll Fri Mar  9 12:58:07 2018
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s
+
+; This tests is checking for a case where the x86 load-op-store fusion
+; misses a dependence between the fused load and a non-fused operand
+; to the load causing a cycle. Here the dependence in question comes
+; from the carry in input of the adcl.
+
+ at vx = external local_unnamed_addr global <2 x i32>, align 8
+
+define void @pr36274(i32* %somewhere) {
+; CHECK-LABEL: pr36274:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl vx+4, %eax
+; CHECK-NEXT:    addl $1, vx
+; CHECK-NEXT:    adcl $0, %eax
+; CHECK-NEXT:    movl %eax, vx+4
+; CHECK-NEXT:    retl
+  %a0  = getelementptr <2 x i32>, <2 x i32>* @vx, i32 0, i32 0
+  %a1  = getelementptr <2 x i32>, <2 x i32>* @vx, i32 0, i32 1
+  %x1  = load volatile i32, i32* %a1, align 4
+  %x0  = load volatile i32, i32* %a0, align 8
+  %vx0 = insertelement <2 x i32> undef, i32 %x0, i32 0
+  %vx1 = insertelement <2 x i32> %vx0, i32 %x1, i32 1
+  %x = bitcast <2 x i32> %vx1 to i64
+  %add = add i64 %x, 1
+  %vadd = bitcast i64 %add to <2 x i32>
+  %vx1_0 = extractelement <2 x i32> %vadd, i32 0
+  %vx1_1 = extractelement <2 x i32> %vadd, i32 1
+  store i32 %vx1_0, i32* %a0, align 8
+  store i32 %vx1_1, i32* %a1, align 4
+  ret void
+}




More information about the llvm-commits mailing list