[PATCH] D36938: [X86] When selecting sse_load_f32/f64 pattern, make sure there's only one use of every node all the way back to the root of the match

Phabricator via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 21 09:05:06 PDT 2017


This revision was automatically updated to reflect the committed changes.
Closed by commit rL311342: [X86] When selecting sse_load_f32/f64 pattern, make sure there's only one useā€¦ (authored by ctopper).

Repository:
  rL LLVM

https://reviews.llvm.org/D36938

Files:
  llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
  llvm/trunk/test/CodeGen/X86/avx512-memfold.ll


Index: llvm/trunk/test/CodeGen/X86/avx512-memfold.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-memfold.ll
+++ llvm/trunk/test/CodeGen/X86/avx512-memfold.ll
@@ -72,9 +72,10 @@
 define <4 x float> @test_mask_add_ss_double_use(<4 x float> %a, float* %b, i8 %mask, <4 x float> %c) {
 ; CHECK-LABEL: test_mask_add_ss_double_use:
 ; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; CHECK-NEXT:    kmovw %esi, %k1
-; CHECK-NEXT:    vaddss (%rdi), %xmm0, %xmm1 {%k1}
-; CHECK-NEXT:    vaddss (%rdi), %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vaddss %xmm2, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vaddss %xmm2, %xmm0, %xmm0 {%k1} {z}
 ; CHECK-NEXT:    vmulps %xmm0, %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %b.val = load float, float* %b
Index: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1538,6 +1538,20 @@
   return true;
 }
 
+// We can only fold a load if all nodes between it and the root node have a
+// single use. If there are additional uses, we could end up duplicating the
+// load.
+static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *N) {
+  SDNode *User = *N->use_begin();
+  while (User != Root) {
+    if (!User->hasOneUse())
+      return false;
+    User = *User->use_begin();
+  }
+
+  return true;
+}
+
 /// Match a scalar SSE load. In particular, we want to match a load whose top
 /// elements are either undef or zeros. The load flavor is derived from the
 /// type of N, which is either v4f32 or v2f64.
@@ -1554,7 +1568,8 @@
   if (ISD::isNON_EXTLoad(N.getNode())) {
     PatternNodeWithChain = N;
     if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
-        IsLegalToFold(PatternNodeWithChain, *N->use_begin(), Root, OptLevel)) {
+        IsLegalToFold(PatternNodeWithChain, *N->use_begin(), Root, OptLevel) &&
+        hasSingleUsesFromRoot(Root, N.getNode())) {
       LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
       return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
                         Segment);
@@ -1565,7 +1580,8 @@
   if (N.getOpcode() == X86ISD::VZEXT_LOAD) {
     PatternNodeWithChain = N;
     if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
-        IsLegalToFold(PatternNodeWithChain, *N->use_begin(), Root, OptLevel)) {
+        IsLegalToFold(PatternNodeWithChain, *N->use_begin(), Root, OptLevel) &&
+        hasSingleUsesFromRoot(Root, N.getNode())) {
       auto *MI = cast<MemIntrinsicSDNode>(PatternNodeWithChain);
       return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp,
                         Segment);
@@ -1579,7 +1595,8 @@
     PatternNodeWithChain = N.getOperand(0);
     if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
         IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
-        IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
+        IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel) &&
+        hasSingleUsesFromRoot(Root, N.getNode())) {
       LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
       return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
                         Segment);
@@ -1595,7 +1612,8 @@
     PatternNodeWithChain = N.getOperand(0).getOperand(0);
     if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
         IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
-        IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
+        IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel) &&
+        hasSingleUsesFromRoot(Root, N.getNode())) {
       // Okay, this is a zero extending load.  Fold it.
       LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
       return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D36938.111991.patch
Type: text/x-patch
Size: 4006 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170821/86186e7c/attachment.bin>


More information about the llvm-commits mailing list