[llvm] r334908 - [X86] Pass the parent SDNode to X86DAGToDAGISel::selectScalarSSELoad to simplify the hasSingleUseFromRoot handling.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Jun 17 09:29:46 PDT 2018


Author: ctopper
Date: Sun Jun 17 09:29:46 2018
New Revision: 334908

URL: http://llvm.org/viewvc/llvm-project?rev=334908&view=rev
Log:
[X86] Pass the parent SDNode to X86DAGToDAGISel::selectScalarSSELoad to simplify the hasSingleUseFromRoot handling.

Some of the calls to hasSingleUseFromRoot were passing the load itself. If the load's chain result has a user this would count against that. By getting the true parent of the match and ensuring any intermediate between the match and the load have a single use we can avoid this case. isLegalToFold will take care of checking users of the load's data output.

This fixed at least fma-scalar-memfold.ll to succed without the peephole pass.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
    llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
    llvm/trunk/test/CodeGen/X86/fma-scalar-memfold.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=334908&r1=334907&r2=334908&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Sun Jun 17 09:29:46 2018
@@ -215,7 +215,7 @@ namespace {
     bool selectTLSADDRAddr(SDValue N, SDValue &Base,
                            SDValue &Scale, SDValue &Index, SDValue &Disp,
                            SDValue &Segment);
-    bool selectScalarSSELoad(SDNode *Root, SDValue N,
+    bool selectScalarSSELoad(SDNode *Root, SDNode *Parent, SDValue N,
                              SDValue &Base, SDValue &Scale,
                              SDValue &Index, SDValue &Disp,
                              SDValue &Segment,
@@ -1685,8 +1685,7 @@ bool X86DAGToDAGISel::selectAddr(SDNode
 // We can only fold a load if all nodes between it and the root node have a
 // single use. If there are additional uses, we could end up duplicating the
 // load.
-static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *N) {
-  SDNode *User = *N->use_begin();
+static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *User) {
   while (User != Root) {
     if (!User->hasOneUse())
       return false;
@@ -1703,17 +1702,19 @@ static bool hasSingleUsesFromRoot(SDNode
 /// We also return:
 ///   PatternChainNode: this is the matched node that has a chain input and
 ///   output.
-bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root,
+bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDNode *Parent,
                                           SDValue N, SDValue &Base,
                                           SDValue &Scale, SDValue &Index,
                                           SDValue &Disp, SDValue &Segment,
                                           SDValue &PatternNodeWithChain) {
+  if (!hasSingleUsesFromRoot(Root, Parent))
+    return false;
+
   // We can allow a full vector load here since narrowing a load is ok.
   if (ISD::isNON_EXTLoad(N.getNode())) {
     PatternNodeWithChain = N;
     if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
-        IsLegalToFold(PatternNodeWithChain, *N->use_begin(), Root, OptLevel) &&
-        hasSingleUsesFromRoot(Root, N.getNode())) {
+        IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
       LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
       return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
                         Segment);
@@ -1724,8 +1725,7 @@ bool X86DAGToDAGISel::selectScalarSSELoa
   if (N.getOpcode() == X86ISD::VZEXT_LOAD) {
     PatternNodeWithChain = N;
     if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
-        IsLegalToFold(PatternNodeWithChain, *N->use_begin(), Root, OptLevel) &&
-        hasSingleUsesFromRoot(Root, N.getNode())) {
+        IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
       auto *MI = cast<MemIntrinsicSDNode>(PatternNodeWithChain);
       return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp,
                         Segment);
@@ -1739,8 +1739,7 @@ bool X86DAGToDAGISel::selectScalarSSELoa
     PatternNodeWithChain = N.getOperand(0);
     if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
         IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
-        IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel) &&
-        hasSingleUsesFromRoot(Root, N.getNode())) {
+        IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
       LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
       return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
                         Segment);
@@ -1756,8 +1755,7 @@ bool X86DAGToDAGISel::selectScalarSSELoa
     PatternNodeWithChain = N.getOperand(0).getOperand(0);
     if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
         IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
-        IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel) &&
-        hasSingleUsesFromRoot(Root, N.getNode())) {
+        IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
       // Okay, this is a zero extending load.  Fold it.
       LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
       return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,

Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=334908&r1=334907&r2=334908&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Sun Jun 17 09:29:46 2018
@@ -651,10 +651,10 @@ def X86GF2P8mulb        : SDNode<"X86ISD
 // forms.
 def sse_load_f32 : ComplexPattern<v4f32, 5, "selectScalarSSELoad", [],
                                   [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
-                                   SDNPWantRoot]>;
+                                   SDNPWantRoot, SDNPWantParent]>;
 def sse_load_f64 : ComplexPattern<v2f64, 5, "selectScalarSSELoad", [],
                                   [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
-                                   SDNPWantRoot]>;
+                                   SDNPWantRoot, SDNPWantParent]>;
 
 def ssmem : Operand<v4f32> {
   let PrintMethod = "printf32mem";

Modified: llvm/trunk/test/CodeGen/X86/fma-scalar-memfold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma-scalar-memfold.ll?rev=334908&r1=334907&r2=334908&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma-scalar-memfold.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma-scalar-memfold.ll Sun Jun 17 09:29:46 2018
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
-; RUN: llc < %s -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
+; RUN: llc < %s -disable-peephole -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
+; RUN: llc < %s -disable-peephole -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
 
 target triple = "x86_64-unknown-unknown"
 




More information about the llvm-commits mailing list