[llvm] r320505 - [X86] Cleanup type conversion of 64-bit load-store pairs.
Nirav Dave via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 12 10:25:48 PST 2017
Author: niravd
Date: Tue Dec 12 10:25:48 2017
New Revision: 320505
URL: http://llvm.org/viewvc/llvm-project?rev=320505&view=rev
Log:
[X86] Cleanup type conversion of 64-bit load-store pairs.
Summary:
Simplify and generalize chain handling and search for 64-bit load-store pairs.
Nontemporal test now converts 64-bit integer load-store into f64 which it realizes directly instead of splitting into two i32 pairs.
Reviewers: craig.topper, spatel
Reviewed By: craig.topper
Subscribers: hiraditya, llvm-commits
Differential Revision: https://reviews.llvm.org/D40918
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/nontemporal.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=320505&r1=320504&r2=320505&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Dec 12 10:25:48 2017
@@ -34474,28 +34474,10 @@ static SDValue combineStore(SDNode *N, S
isa<LoadSDNode>(St->getValue()) &&
!cast<LoadSDNode>(St->getValue())->isVolatile() &&
St->getChain().hasOneUse() && !St->isVolatile()) {
- SDNode* LdVal = St->getValue().getNode();
- LoadSDNode *Ld = nullptr;
- int TokenFactorIndex = -1;
+ LoadSDNode *Ld = cast<LoadSDNode>(St->getValue().getNode());
SmallVector<SDValue, 8> Ops;
- SDNode* ChainVal = St->getChain().getNode();
- // Must be a store of a load. We currently handle two cases: the load
- // is a direct child, and it's under an intervening TokenFactor. It is
- // possible to dig deeper under nested TokenFactors.
- if (ChainVal == LdVal)
- Ld = cast<LoadSDNode>(St->getChain());
- else if (St->getValue().hasOneUse() &&
- ChainVal->getOpcode() == ISD::TokenFactor) {
- for (unsigned i = 0, e = ChainVal->getNumOperands(); i != e; ++i) {
- if (ChainVal->getOperand(i).getNode() == LdVal) {
- TokenFactorIndex = i;
- Ld = cast<LoadSDNode>(St->getValue());
- } else
- Ops.push_back(ChainVal->getOperand(i));
- }
- }
- if (!Ld || !ISD::isNormalLoad(Ld))
+ if (!ISD::isNormalLoad(Ld))
return SDValue();
// If this is not the MMX case, i.e. we are just turning i64 load/store
@@ -34512,17 +34494,12 @@ static SDValue combineStore(SDNode *N, S
if (Subtarget.is64Bit() || F64IsLegal) {
MVT LdVT = Subtarget.is64Bit() ? MVT::i64 : MVT::f64;
SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
- Ld->getPointerInfo(), Ld->getAlignment(),
- Ld->getMemOperand()->getFlags());
+ Ld->getMemOperand());
+
// Make sure new load is placed in same chain order.
- SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
- if (TokenFactorIndex >= 0) {
- Ops.push_back(NewChain);
- NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops);
- }
- return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
- St->getPointerInfo(), St->getAlignment(),
- St->getMemOperand()->getFlags());
+ DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
+ return DAG.getStore(St->getChain(), StDL, NewLd, St->getBasePtr(),
+ St->getMemOperand());
}
// Otherwise, lower to two pairs of 32-bit loads / stores.
@@ -34537,23 +34514,19 @@ static SDValue combineStore(SDNode *N, S
MinAlign(Ld->getAlignment(), 4),
Ld->getMemOperand()->getFlags());
// Make sure new loads are placed in same chain order.
- SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, LoLd);
- NewChain = DAG.makeEquivalentMemoryOrdering(Ld, HiLd);
-
- if (TokenFactorIndex >= 0) {
- Ops.push_back(NewChain);
- NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops);
- }
+ DAG.makeEquivalentMemoryOrdering(Ld, LoLd);
+ DAG.makeEquivalentMemoryOrdering(Ld, HiLd);
LoAddr = St->getBasePtr();
HiAddr = DAG.getMemBasePlusOffset(LoAddr, 4, StDL);
SDValue LoSt =
- DAG.getStore(NewChain, StDL, LoLd, LoAddr, St->getPointerInfo(),
+ DAG.getStore(St->getChain(), StDL, LoLd, LoAddr, St->getPointerInfo(),
St->getAlignment(), St->getMemOperand()->getFlags());
- SDValue HiSt = DAG.getStore(
- NewChain, StDL, HiLd, HiAddr, St->getPointerInfo().getWithOffset(4),
- MinAlign(St->getAlignment(), 4), St->getMemOperand()->getFlags());
+ SDValue HiSt = DAG.getStore(St->getChain(), StDL, HiLd, HiAddr,
+ St->getPointerInfo().getWithOffset(4),
+ MinAlign(St->getAlignment(), 4),
+ St->getMemOperand()->getFlags());
return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt);
}
Modified: llvm/trunk/test/CodeGen/X86/nontemporal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/nontemporal.ll?rev=320505&r1=320504&r2=320505&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/nontemporal.ll (original)
+++ llvm/trunk/test/CodeGen/X86/nontemporal.ll Tue Dec 12 10:25:48 2017
@@ -9,45 +9,42 @@ define i32 @f(<4 x float> %A, i8* %B, <2
; X32-SSE: # %bb.0:
; X32-SSE-NEXT: pushl %ebp
; X32-SSE-NEXT: movl %esp, %ebp
-; X32-SSE-NEXT: pushl %edi
; X32-SSE-NEXT: pushl %esi
; X32-SSE-NEXT: andl $-16, %esp
-; X32-SSE-NEXT: movl 76(%ebp), %ecx
+; X32-SSE-NEXT: subl $16, %esp
+; X32-SSE-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
; X32-SSE-NEXT: movl 12(%ebp), %eax
-; X32-SSE-NEXT: movdqa 56(%ebp), %xmm3
-; X32-SSE-NEXT: movdqa 40(%ebp), %xmm4
-; X32-SSE-NEXT: movdqa 24(%ebp), %xmm5
-; X32-SSE-NEXT: movl 8(%ebp), %esi
-; X32-SSE-NEXT: movl 80(%ebp), %edx
-; X32-SSE-NEXT: movl (%edx), %edi
+; X32-SSE-NEXT: movdqa 56(%ebp), %xmm4
+; X32-SSE-NEXT: movdqa 40(%ebp), %xmm5
+; X32-SSE-NEXT: movdqa 24(%ebp), %xmm6
+; X32-SSE-NEXT: movl 8(%ebp), %edx
+; X32-SSE-NEXT: movl 80(%ebp), %ecx
+; X32-SSE-NEXT: movl (%ecx), %esi
; X32-SSE-NEXT: addps {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT: movntps %xmm0, (%esi)
+; X32-SSE-NEXT: movntps %xmm0, (%edx)
; X32-SSE-NEXT: paddq {{\.LCPI.*}}, %xmm2
-; X32-SSE-NEXT: addl (%edx), %edi
-; X32-SSE-NEXT: movntdq %xmm2, (%esi)
+; X32-SSE-NEXT: addl (%ecx), %esi
+; X32-SSE-NEXT: movntdq %xmm2, (%edx)
; X32-SSE-NEXT: addpd {{\.LCPI.*}}, %xmm1
-; X32-SSE-NEXT: addl (%edx), %edi
-; X32-SSE-NEXT: movntpd %xmm1, (%esi)
-; X32-SSE-NEXT: paddd {{\.LCPI.*}}, %xmm5
-; X32-SSE-NEXT: addl (%edx), %edi
-; X32-SSE-NEXT: movntdq %xmm5, (%esi)
-; X32-SSE-NEXT: paddw {{\.LCPI.*}}, %xmm4
-; X32-SSE-NEXT: addl (%edx), %edi
-; X32-SSE-NEXT: movntdq %xmm4, (%esi)
-; X32-SSE-NEXT: paddb {{\.LCPI.*}}, %xmm3
-; X32-SSE-NEXT: addl (%edx), %edi
-; X32-SSE-NEXT: movntdq %xmm3, (%esi)
-; X32-SSE-NEXT: addl (%edx), %edi
-; X32-SSE-NEXT: movntil %eax, (%esi)
-; X32-SSE-NEXT: movl (%edx), %eax
-; X32-SSE-NEXT: movntil %ecx, 4(%esi)
-; X32-SSE-NEXT: movl 72(%ebp), %ecx
-; X32-SSE-NEXT: movntil %ecx, (%esi)
-; X32-SSE-NEXT: addl %edi, %eax
-; X32-SSE-NEXT: addl (%edx), %eax
-; X32-SSE-NEXT: leal -8(%ebp), %esp
+; X32-SSE-NEXT: addl (%ecx), %esi
+; X32-SSE-NEXT: movntpd %xmm1, (%edx)
+; X32-SSE-NEXT: paddd {{\.LCPI.*}}, %xmm6
+; X32-SSE-NEXT: addl (%ecx), %esi
+; X32-SSE-NEXT: movntdq %xmm6, (%edx)
+; X32-SSE-NEXT: paddw {{\.LCPI.*}}, %xmm5
+; X32-SSE-NEXT: addl (%ecx), %esi
+; X32-SSE-NEXT: movntdq %xmm5, (%edx)
+; X32-SSE-NEXT: paddb {{\.LCPI.*}}, %xmm4
+; X32-SSE-NEXT: addl (%ecx), %esi
+; X32-SSE-NEXT: movntdq %xmm4, (%edx)
+; X32-SSE-NEXT: addl (%ecx), %esi
+; X32-SSE-NEXT: movntil %eax, (%edx)
+; X32-SSE-NEXT: movl (%ecx), %eax
+; X32-SSE-NEXT: addl %esi, %eax
+; X32-SSE-NEXT: movsd %xmm3, (%edx)
+; X32-SSE-NEXT: addl (%ecx), %eax
+; X32-SSE-NEXT: leal -4(%ebp), %esp
; X32-SSE-NEXT: popl %esi
-; X32-SSE-NEXT: popl %edi
; X32-SSE-NEXT: popl %ebp
; X32-SSE-NEXT: retl
;
@@ -55,45 +52,42 @@ define i32 @f(<4 x float> %A, i8* %B, <2
; X32-AVX: # %bb.0:
; X32-AVX-NEXT: pushl %ebp
; X32-AVX-NEXT: movl %esp, %ebp
-; X32-AVX-NEXT: pushl %edi
; X32-AVX-NEXT: pushl %esi
; X32-AVX-NEXT: andl $-16, %esp
-; X32-AVX-NEXT: movl 76(%ebp), %ecx
+; X32-AVX-NEXT: subl $16, %esp
+; X32-AVX-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
; X32-AVX-NEXT: movl 12(%ebp), %eax
-; X32-AVX-NEXT: vmovdqa 56(%ebp), %xmm3
-; X32-AVX-NEXT: vmovdqa 40(%ebp), %xmm4
-; X32-AVX-NEXT: vmovdqa 24(%ebp), %xmm5
-; X32-AVX-NEXT: movl 8(%ebp), %esi
+; X32-AVX-NEXT: vmovdqa 56(%ebp), %xmm4
+; X32-AVX-NEXT: vmovdqa 40(%ebp), %xmm5
+; X32-AVX-NEXT: vmovdqa 24(%ebp), %xmm6
+; X32-AVX-NEXT: movl 8(%ebp), %ecx
; X32-AVX-NEXT: movl 80(%ebp), %edx
-; X32-AVX-NEXT: movl (%edx), %edi
+; X32-AVX-NEXT: movl (%edx), %esi
; X32-AVX-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0
-; X32-AVX-NEXT: vmovntps %xmm0, (%esi)
+; X32-AVX-NEXT: vmovntps %xmm0, (%ecx)
; X32-AVX-NEXT: vpaddq {{\.LCPI.*}}, %xmm2, %xmm0
-; X32-AVX-NEXT: addl (%edx), %edi
-; X32-AVX-NEXT: vmovntdq %xmm0, (%esi)
+; X32-AVX-NEXT: addl (%edx), %esi
+; X32-AVX-NEXT: vmovntdq %xmm0, (%ecx)
; X32-AVX-NEXT: vaddpd {{\.LCPI.*}}, %xmm1, %xmm0
-; X32-AVX-NEXT: addl (%edx), %edi
-; X32-AVX-NEXT: vmovntpd %xmm0, (%esi)
-; X32-AVX-NEXT: vpaddd {{\.LCPI.*}}, %xmm5, %xmm0
-; X32-AVX-NEXT: addl (%edx), %edi
-; X32-AVX-NEXT: vmovntdq %xmm0, (%esi)
-; X32-AVX-NEXT: vpaddw {{\.LCPI.*}}, %xmm4, %xmm0
-; X32-AVX-NEXT: addl (%edx), %edi
-; X32-AVX-NEXT: vmovntdq %xmm0, (%esi)
-; X32-AVX-NEXT: vpaddb {{\.LCPI.*}}, %xmm3, %xmm0
-; X32-AVX-NEXT: addl (%edx), %edi
-; X32-AVX-NEXT: vmovntdq %xmm0, (%esi)
-; X32-AVX-NEXT: addl (%edx), %edi
-; X32-AVX-NEXT: movntil %eax, (%esi)
+; X32-AVX-NEXT: addl (%edx), %esi
+; X32-AVX-NEXT: vmovntpd %xmm0, (%ecx)
+; X32-AVX-NEXT: vpaddd {{\.LCPI.*}}, %xmm6, %xmm0
+; X32-AVX-NEXT: addl (%edx), %esi
+; X32-AVX-NEXT: vmovntdq %xmm0, (%ecx)
+; X32-AVX-NEXT: vpaddw {{\.LCPI.*}}, %xmm5, %xmm0
+; X32-AVX-NEXT: addl (%edx), %esi
+; X32-AVX-NEXT: vmovntdq %xmm0, (%ecx)
+; X32-AVX-NEXT: vpaddb {{\.LCPI.*}}, %xmm4, %xmm0
+; X32-AVX-NEXT: addl (%edx), %esi
+; X32-AVX-NEXT: vmovntdq %xmm0, (%ecx)
+; X32-AVX-NEXT: addl (%edx), %esi
+; X32-AVX-NEXT: movntil %eax, (%ecx)
; X32-AVX-NEXT: movl (%edx), %eax
-; X32-AVX-NEXT: movntil %ecx, 4(%esi)
-; X32-AVX-NEXT: movl 72(%ebp), %ecx
-; X32-AVX-NEXT: movntil %ecx, (%esi)
-; X32-AVX-NEXT: addl %edi, %eax
+; X32-AVX-NEXT: addl %esi, %eax
+; X32-AVX-NEXT: vmovsd %xmm3, (%ecx)
; X32-AVX-NEXT: addl (%edx), %eax
-; X32-AVX-NEXT: leal -8(%ebp), %esp
+; X32-AVX-NEXT: leal -4(%ebp), %esp
; X32-AVX-NEXT: popl %esi
-; X32-AVX-NEXT: popl %edi
; X32-AVX-NEXT: popl %ebp
; X32-AVX-NEXT: retl
;
More information about the llvm-commits
mailing list