[PATCH] D60255: [SystemZ] Bugfix in isFusableLoadOpStorePattern().

Jonas Paulsson via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 4 01:25:32 PDT 2019


jonpa created this revision.
jonpa added a reviewer: uweigand.

This function is responsible for checking the legality of fusing an instance of load -> op -> store into a single operation. In the SystemZ backend the check was incomplete and a test case emerged with a cycle in the DAG as a result.

Instead of using the NodeIds to determine node relationships, hasPredecessorHelper() now is used just like in the X86 backend. This handled the failing tests and as well gave a few additional transformations on benchmarks.

The SystemZ isFusableLoadOpStorePattern() is a a very near copy of the X86 function, and it seems this could be made a utility function in common code instead.


https://reviews.llvm.org/D60255

Files:
  lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
  test/CodeGen/SystemZ/int-uadd-12.ll


Index: test/CodeGen/SystemZ/int-uadd-12.ll
===================================================================
--- /dev/null
+++ test/CodeGen/SystemZ/int-uadd-12.ll
@@ -0,0 +1,34 @@
+; Test that this test case does not abort after the folding of load -> add ->
+; store into an alsi. This folding is suppose to not happen as it would
+; introduce a loop in the DAG.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -disable-basicaa -consthoist-gep | FileCheck %s
+
+ at g_295 = external dso_local unnamed_addr global i32, align 4
+ at g_672 = external dso_local unnamed_addr global i64, align 8
+ at g_1484 = external dso_local global <{ i8, i64, { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, [2 x i8], i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, i8 }, i32 }>, align 2
+
+define void @fun() {
+; CHECK-LABEL: fun:
+
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  store i32 2, i32* getelementptr inbounds (<{ i8, i64, { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, [2 x i8], i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, i8 }, i32 }>, <{ i8, i64, { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, [2 x i8], i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, i8 }, i32 }>* @g_1484, i64 0, i32 2, i32 16)
+  %tmp = icmp slt i32 undef, 3
+  br i1 %tmp, label %bb1, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %tmp3 = load i32, i32* getelementptr inbounds (<{ i8, i64, { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, [2 x i8], i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, i8 }, i32 }>, <{ i8, i64, { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, [2 x i8], i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, i8 }, i32 }>* @g_1484, i64 0, i32 2, i32 28)
+  %tmp4 = load i64, i64* @g_672
+  %tmp5 = add i64 %tmp4, 1
+  store i64 %tmp5, i64* @g_672
+  %tmp6 = icmp eq i64 %tmp5, 0
+  %tmp7 = zext i1 %tmp6 to i32
+  %tmp8 = icmp ult i32 %tmp3, %tmp7
+  %tmp9 = zext i1 %tmp8 to i32
+  store i32 %tmp9, i32* @g_295
+  ret void
+}
+
Index: lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
===================================================================
--- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -1275,6 +1275,9 @@
     InputChain = LoadNode->getChain();
   } else if (Chain.getOpcode() == ISD::TokenFactor) {
     SmallVector<SDValue, 4> ChainOps;
+    SmallVector<const SDNode *, 4> LoopWorklist;
+    SmallPtrSet<const SDNode *, 16> Visited;
+    const unsigned int Max = 1024;
     for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
       SDValue Op = Chain.getOperand(i);
       if (Op == Load.getValue(1)) {
@@ -1283,28 +1286,26 @@
         ChainOps.push_back(Load.getOperand(0));
         continue;
       }
-
-      // Make sure using Op as part of the chain would not cause a cycle here.
-      // In theory, we could check whether the chain node is a predecessor of
-      // the load. But that can be very expensive. Instead visit the uses and
-      // make sure they all have smaller node id than the load.
-      int LoadId = LoadNode->getNodeId();
-      for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
-             UE = UI->use_end(); UI != UE; ++UI) {
-        if (UI.getUse().getResNo() != 0)
-          continue;
-        if (UI->getNodeId() > LoadId)
-          return false;
-      }
-
+      LoopWorklist.push_back(Op.getNode());
       ChainOps.push_back(Op);
     }
 
-    if (ChainCheck)
+    if (ChainCheck) {
+      // Add the other operand of StoredVal to worklist.
+      for (SDValue Op : StoredVal->ops())
+        if (Op.getNode() != LoadNode)
+          LoopWorklist.push_back(Op.getNode());
+
+      // Check if Load is reachable from any of the nodes in the worklist.
+      if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
+                                       true))
+        return false;
+
       // Make a new TokenFactor with all the other input chains except
       // for the load.
       InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain),
                                    MVT::Other, ChainOps);
+    }
   }
   if (!ChainCheck)
     return false;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D60255.193670.patch
Type: text/x-patch
Size: 4329 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190404/1691e747/attachment-0001.bin>


More information about the llvm-commits mailing list