[PATCH] D36938: [X86] When selecting sse_load_f32/f64 pattern, make sure there's only one use of every node all the way back to the root of the match
Craig Topper via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 21 01:06:06 PDT 2017
craig.topper updated this revision to Diff 111936.
craig.topper added a comment.
Reword comment.
https://reviews.llvm.org/D36938
Files:
lib/Target/X86/X86ISelDAGToDAG.cpp
test/CodeGen/X86/avx512-memfold.ll
Index: test/CodeGen/X86/avx512-memfold.ll
===================================================================
--- test/CodeGen/X86/avx512-memfold.ll
+++ test/CodeGen/X86/avx512-memfold.ll
@@ -72,9 +72,10 @@
define <4 x float> @test_mask_add_ss_double_use(<4 x float> %a, float* %b, i8 %mask, <4 x float> %c) {
; CHECK-LABEL: test_mask_add_ss_double_use:
; CHECK: ## BB#0:
+; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; CHECK-NEXT: kmovw %esi, %k1
-; CHECK-NEXT: vaddss (%rdi), %xmm0, %xmm1 {%k1}
-; CHECK-NEXT: vaddss (%rdi), %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: vaddss %xmm2, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vaddss %xmm2, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vmulps %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%b.val = load float, float* %b
Index: lib/Target/X86/X86ISelDAGToDAG.cpp
===================================================================
--- lib/Target/X86/X86ISelDAGToDAG.cpp
+++ lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1538,6 +1538,20 @@
return true;
}
+// We can only fold a load if all nodes between it and the root node have a
+// single use. If there are additional uses, we could end up duplicating the
+// load.
+static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *N) {
+ SDNode *User = *N->use_begin();
+ while (User != Root) {
+ if (!User->hasOneUse())
+ return false;
+ User = *User->use_begin();
+ }
+
+ return true;
+}
+
/// Match a scalar SSE load. In particular, we want to match a load whose top
/// elements are either undef or zeros. The load flavor is derived from the
/// type of N, which is either v4f32 or v2f64.
@@ -1554,7 +1568,8 @@
if (ISD::isNON_EXTLoad(N.getNode())) {
PatternNodeWithChain = N;
if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
- IsLegalToFold(PatternNodeWithChain, *N->use_begin(), Root, OptLevel)) {
+ IsLegalToFold(PatternNodeWithChain, *N->use_begin(), Root, OptLevel) &&
+ hasSingleUsesFromRoot(Root, N.getNode())) {
LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
Segment);
@@ -1565,7 +1580,8 @@
if (N.getOpcode() == X86ISD::VZEXT_LOAD) {
PatternNodeWithChain = N;
if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
- IsLegalToFold(PatternNodeWithChain, *N->use_begin(), Root, OptLevel)) {
+ IsLegalToFold(PatternNodeWithChain, *N->use_begin(), Root, OptLevel) &&
+ hasSingleUsesFromRoot(Root, N.getNode())) {
auto *MI = cast<MemIntrinsicSDNode>(PatternNodeWithChain);
return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp,
Segment);
@@ -1579,7 +1595,8 @@
PatternNodeWithChain = N.getOperand(0);
if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
- IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
+ IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel) &&
+ hasSingleUsesFromRoot(Root, N.getNode())) {
LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
Segment);
@@ -1595,7 +1612,8 @@
PatternNodeWithChain = N.getOperand(0).getOperand(0);
if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
- IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
+ IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel) &&
+ hasSingleUsesFromRoot(Root, N.getNode())) {
// Okay, this is a zero extending load. Fold it.
LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D36938.111936.patch
Type: text/x-patch
Size: 3940 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170821/5c4a6380/attachment.bin>
More information about the llvm-commits
mailing list