[PATCH] D16611: Fix CombineToPreIndexedLoadStore O(n^2) behavior

Tim Shen via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 26 16:16:45 PST 2016


timshen created this revision.
timshen added reviewers: echristo, iteratee.
timshen added a subscriber: llvm-commits.

This patch consists of two parts: a performance fix in DAGCombiner.cpp and a correctness fix in SelectionDAG.cpp.

The test case tests the bug that's uncovered by the performance fix, and fixed by the correctness fix.

The performance fix keeps the containers required by the hasPredecessorHelper (which is a lazy DFS) and reuse them. Since hasPredecessorHelper is called in a loop, the overall efficiency reduced from O(n^2) to O(n), where n is the number of SDNodes.

The correctness fix keeps iterating the neighbor list even if it's time to early return. It will return after finishing adding all neighbors to Worklist, so that no neighbors are discarded due to the original early return.

http://reviews.llvm.org/D16611

Files:
  lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  lib/CodeGen/SelectionDAG/SelectionDAG.cpp
  test/CodeGen/PowerPC/combine-to-pre-index-store-crash.ll

Index: test/CodeGen/PowerPC/combine-to-pre-index-store-crash.ll
===================================================================
--- /dev/null
+++ test/CodeGen/PowerPC/combine-to-pre-index-store-crash.ll
@@ -0,0 +1,20 @@
+; RUN: llc %s
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-grtev4-linux-gnu"
+
+%StructA = type <{ i64, { i64, i64 }, { i64, i64 } }>
+
+define void @TestFoo(%StructA* %this) {
+  %tmp = getelementptr inbounds %StructA, %StructA* %this, i64 0, i32 1
+  %tmp11 = getelementptr inbounds %StructA, %StructA* %this, i64 0, i32 1, i32 1
+  %tmp12 = bitcast { i64, i64 }* %tmp to i64**
+  store i64* %tmp11, i64** %tmp12
+  call void @TestBar()
+  %tmp13 = getelementptr inbounds %StructA, %StructA* %this, i64 0, i32 2, i32 1
+  store i64* %tmp13, i64** undef
+  %.cast.i.i.i = bitcast i64* %tmp13 to i8*
+  store i8 0, i8* %.cast.i.i.i
+  ret void
+}
+
+declare void @TestBar()
Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6950,13 +6950,14 @@
   // Haven't visited N yet. Continue the search.
   while (!Worklist.empty()) {
     const SDNode *M = Worklist.pop_back_val();
+    bool Found = false;
     for (const SDValue &OpV : M->op_values()) {
       SDNode *Op = OpV.getNode();
       if (Visited.insert(Op).second)
         Worklist.push_back(Op);
-      if (Op == N)
-        return true;
+      if (Op == N) Found = true;
     }
+    if (Found) return true;
   }
 
   return false;
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9593,6 +9593,10 @@
       return false;
   }
 
+  // Caches for hasPredecessorHelper
+  SmallPtrSet<const SDNode *, 32> Visited;
+  SmallVector<const SDNode *, 16> Worklist;
+
   // If the offset is a constant, there may be other adds of constants that
   // can be folded with this one. We should do this to avoid having to keep
   // a copy of the original base pointer.
@@ -9607,8 +9611,7 @@
       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
         continue;
 
-      if (Use.getUser()->isPredecessorOf(N))
-        continue;
+      if (N->hasPredecessorHelper(Use.getUser(), Visited, Worklist)) continue;
 
       if (Use.getUser()->getOpcode() != ISD::ADD &&
           Use.getUser()->getOpcode() != ISD::SUB) {
@@ -9637,10 +9640,6 @@
   // Now check for #3 and #4.
   bool RealUse = false;
 
-  // Caches for hasPredecessorHelper
-  SmallPtrSet<const SDNode *, 32> Visited;
-  SmallVector<const SDNode *, 16> Worklist;
-
   for (SDNode *Use : Ptr.getNode()->uses()) {
     if (Use == N)
       continue;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D16611.46070.patch
Type: text/x-patch
Size: 2838 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160127/fa0852ab/attachment.bin>


More information about the llvm-commits mailing list