[llvm] r287983 - [X86] Add a hasOneUse check to selectScalarSSELoad to keep the same load from being folded multiple times.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 26 09:29:25 PST 2016
Author: ctopper
Date: Sat Nov 26 11:29:25 2016
New Revision: 287983
URL: http://llvm.org/viewvc/llvm-project?rev=287983&view=rev
Log:
[X86] Add a hasOneUse check to selectScalarSSELoad to keep the same load from being folded multiple times.
Summary: When selectScalarSSELoad is looking for a scalar_to_vector of a scalar load, it makes sure the load is only used by the scalar_to_vector. But it doesn't make sure the scalar_to_vector is only used once. This can cause the same load to be folded multiple times. This can be bad for performance. This also causes the chain output to be duplicated, but not connected to anything so chain dependencies will not be satisfied.
Reviewers: RKSimon, zvi, delena, spatel
Subscribers: andreadb, llvm-commits
Differential Revision: https://reviews.llvm.org/D26790
Modified:
llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=287983&r1=287982&r2=287983&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Sat Nov 26 11:29:25 2016
@@ -1513,12 +1513,15 @@ bool X86DAGToDAGISel::selectScalarSSELoa
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment,
SDValue &PatternNodeWithChain) {
- if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ // Need to make sure that the SCALAR_TO_VECTOR and load are both only used
+ // once. Otherwise the load might get duplicated and the chain output of the
+ // duplicate load will not be observed by all dependencies.
+ if (N.getOpcode() == ISD::SCALAR_TO_VECTOR && N.getNode()->hasOneUse()) {
PatternNodeWithChain = N.getOperand(0);
if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
PatternNodeWithChain.hasOneUse() &&
- IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
- IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
+ IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
+ IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
if (!selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
return false;
Modified: llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll?rev=287983&r1=287982&r2=287983&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll Sat Nov 26 11:29:25 2016
@@ -381,33 +381,37 @@ define <4 x float> @double_fold(float* %
; X32-LABEL: double_fold:
; X32: ## BB#0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movaps %xmm0, %xmm1
-; X32-NEXT: minss (%eax), %xmm1
-; X32-NEXT: maxss (%eax), %xmm0
-; X32-NEXT: addps %xmm1, %xmm0
+; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT: movaps %xmm0, %xmm2
+; X32-NEXT: minss %xmm1, %xmm2
+; X32-NEXT: maxss %xmm1, %xmm0
+; X32-NEXT: addps %xmm2, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: double_fold:
; X64: ## BB#0: ## %entry
-; X64-NEXT: movaps %xmm0, %xmm1
-; X64-NEXT: minss (%rdi), %xmm1
-; X64-NEXT: maxss (%rdi), %xmm0
-; X64-NEXT: addps %xmm1, %xmm0
+; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT: movaps %xmm0, %xmm2
+; X64-NEXT: minss %xmm1, %xmm2
+; X64-NEXT: maxss %xmm1, %xmm0
+; X64-NEXT: addps %xmm2, %xmm0
; X64-NEXT: retq
;
; X32_AVX-LABEL: double_fold:
; X32_AVX: ## BB#0: ## %entry
; X32_AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32_AVX-NEXT: vminss (%eax), %xmm0, %xmm1
-; X32_AVX-NEXT: vmaxss (%eax), %xmm0, %xmm0
-; X32_AVX-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; X32_AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32_AVX-NEXT: vminss %xmm1, %xmm0, %xmm2
+; X32_AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
+; X32_AVX-NEXT: vaddps %xmm0, %xmm2, %xmm0
; X32_AVX-NEXT: retl
;
; X64_AVX-LABEL: double_fold:
; X64_AVX: ## BB#0: ## %entry
-; X64_AVX-NEXT: vminss (%rdi), %xmm0, %xmm1
-; X64_AVX-NEXT: vmaxss (%rdi), %xmm0, %xmm0
-; X64_AVX-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; X64_AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64_AVX-NEXT: vminss %xmm1, %xmm0, %xmm2
+; X64_AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
+; X64_AVX-NEXT: vaddps %xmm0, %xmm2, %xmm0
; X64_AVX-NEXT: retq
entry:
%0 = load float, float* %x, align 1
More information about the llvm-commits
mailing list