[llvm] 15b6aa7 - [X86] Enable the use of movlps for i64 atomic load on 32-bit targets with sse1.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 23 15:14:25 PST 2020
Author: Craig Topper
Date: 2020-02-23T15:11:38-08:00
New Revision: 15b6aa744881b6e77a3d6773afa3016fc2f9f123
URL: https://github.com/llvm/llvm-project/commit/15b6aa744881b6e77a3d6773afa3016fc2f9f123
DIFF: https://github.com/llvm/llvm-project/commit/15b6aa744881b6e77a3d6773afa3016fc2f9f123.diff
LOG: [X86] Enable the use of movlps for i64 atomic load on 32-bit targets with sse1.
Still a little room for improvement by using movlps to store to
the stack temporary needed to move data out of the xmm register
after the load.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/atomic-fp.ll
llvm/test/CodeGen/X86/atomic-non-integer.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b7ff255f0881..575eeb68bd2f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -27544,7 +27544,7 @@ X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
- (Subtarget.hasSSE2() || Subtarget.hasX87()))
+ (Subtarget.hasSSE1() || Subtarget.hasX87()))
return AtomicExpansionKind::None;
return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
@@ -29667,15 +29667,27 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Attribute::NoImplicitFloat);
if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) {
auto *Node = cast<AtomicSDNode>(N);
- if (Subtarget.hasSSE2()) {
- // Use a VZEXT_LOAD which will be selected as MOVQ. Then extract the
- // lower 64-bits.
- SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
+ if (Subtarget.hasSSE1()) {
+ // Use a VZEXT_LOAD which will be selected as MOVQ or XORPS+MOVLPS.
+ // Then extract the lower 64-bits.
+ MVT LdVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32;
+ SDVTList Tys = DAG.getVTList(LdVT, MVT::Other);
SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
MVT::i64, Node->getMemOperand());
- SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
+ if (Subtarget.hasSSE2()) {
+ SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
+ DAG.getIntPtrConstant(0, dl));
+ Results.push_back(Res);
+ Results.push_back(Ld.getValue(1));
+ return;
+ }
+ // We use an alternative sequence for SSE1 that extracts as v2f32 and
+ // then casts to i64. This avoids a 128-bit stack temporary being
+ // created by type legalization if we were to cast v4f32->v2i64.
+ SDValue Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2f32, Ld,
DAG.getIntPtrConstant(0, dl));
+ Res = DAG.getBitcast(MVT::i64, Res);
Results.push_back(Res);
Results.push_back(Ld.getValue(1));
return;
diff --git a/llvm/test/CodeGen/X86/atomic-fp.ll b/llvm/test/CodeGen/X86/atomic-fp.ll
index 01e0480a815b..17b58a581db1 100644
--- a/llvm/test/CodeGen/X86/atomic-fp.ll
+++ b/llvm/test/CodeGen/X86/atomic-fp.ll
@@ -107,18 +107,17 @@ define void @fadd_64r(double* %loc, double %val) nounwind {
; X86-SSE1-NEXT: pushl %ebp
; X86-SSE1-NEXT: movl %esp, %ebp
; X86-SSE1-NEXT: andl $-8, %esp
-; X86-SSE1-NEXT: subl $24, %esp
+; X86-SSE1-NEXT: subl $16, %esp
; X86-SSE1-NEXT: movl 8(%ebp), %eax
-; X86-SSE1-NEXT: fildll (%eax)
-; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-SSE1-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movl %ecx, (%esp)
+; X86-SSE1-NEXT: xorps %xmm0, %xmm0
+; X86-SSE1-NEXT: xorps %xmm1, %xmm1
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
+; X86-SSE1-NEXT: movss %xmm1, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fldl (%esp)
; X86-SSE1-NEXT: faddl 12(%ebp)
; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: xorps %xmm0, %xmm0
; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; X86-SSE1-NEXT: movlps %xmm0, (%eax)
; X86-SSE1-NEXT: movl %ebp, %esp
@@ -274,17 +273,16 @@ define void @fadd_64g() nounwind {
; X86-SSE1-NEXT: pushl %ebp
; X86-SSE1-NEXT: movl %esp, %ebp
; X86-SSE1-NEXT: andl $-8, %esp
-; X86-SSE1-NEXT: subl $24, %esp
-; X86-SSE1-NEXT: fildll glob64
-; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movl %eax, (%esp)
+; X86-SSE1-NEXT: subl $16, %esp
+; X86-SSE1-NEXT: xorps %xmm0, %xmm0
+; X86-SSE1-NEXT: xorps %xmm1, %xmm1
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
+; X86-SSE1-NEXT: movss %xmm1, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fld1
; X86-SSE1-NEXT: faddl (%esp)
; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: xorps %xmm0, %xmm0
; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; X86-SSE1-NEXT: movlps %xmm0, glob64
; X86-SSE1-NEXT: movl %ebp, %esp
@@ -438,17 +436,16 @@ define void @fadd_64imm() nounwind {
; X86-SSE1-NEXT: pushl %ebp
; X86-SSE1-NEXT: movl %esp, %ebp
; X86-SSE1-NEXT: andl $-8, %esp
-; X86-SSE1-NEXT: subl $24, %esp
-; X86-SSE1-NEXT: fildll -559038737
-; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movl %eax, (%esp)
+; X86-SSE1-NEXT: subl $16, %esp
+; X86-SSE1-NEXT: xorps %xmm0, %xmm0
+; X86-SSE1-NEXT: xorps %xmm1, %xmm1
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
+; X86-SSE1-NEXT: movss %xmm1, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fld1
; X86-SSE1-NEXT: faddl (%esp)
; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: xorps %xmm0, %xmm0
; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; X86-SSE1-NEXT: movlps %xmm0, -559038737
; X86-SSE1-NEXT: movl %ebp, %esp
@@ -608,17 +605,16 @@ define void @fadd_64stack() nounwind {
; X86-SSE1-NEXT: pushl %ebp
; X86-SSE1-NEXT: movl %esp, %ebp
; X86-SSE1-NEXT: andl $-8, %esp
-; X86-SSE1-NEXT: subl $32, %esp
-; X86-SSE1-NEXT: fildll {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movl %eax, (%esp)
+; X86-SSE1-NEXT: subl $24, %esp
+; X86-SSE1-NEXT: xorps %xmm0, %xmm0
+; X86-SSE1-NEXT: xorps %xmm1, %xmm1
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
+; X86-SSE1-NEXT: movss %xmm1, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fld1
; X86-SSE1-NEXT: faddl (%esp)
; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: xorps %xmm0, %xmm0
; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; X86-SSE1-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: movl %ebp, %esp
@@ -712,25 +708,22 @@ define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) nounwind {
; X86-SSE1: # %bb.0: # %bb
; X86-SSE1-NEXT: pushl %ebp
; X86-SSE1-NEXT: movl %esp, %ebp
-; X86-SSE1-NEXT: pushl %esi
; X86-SSE1-NEXT: andl $-8, %esp
-; X86-SSE1-NEXT: subl $32, %esp
+; X86-SSE1-NEXT: subl $16, %esp
; X86-SSE1-NEXT: movl 20(%ebp), %eax
; X86-SSE1-NEXT: movl 8(%ebp), %ecx
-; X86-SSE1-NEXT: fildll (%ecx,%eax,8)
-; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SSE1-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movl %edx, (%esp)
+; X86-SSE1-NEXT: xorps %xmm0, %xmm0
+; X86-SSE1-NEXT: xorps %xmm1, %xmm1
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
+; X86-SSE1-NEXT: movss %xmm1, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fldl (%esp)
; X86-SSE1-NEXT: faddl 12(%ebp)
; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: xorps %xmm0, %xmm0
; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; X86-SSE1-NEXT: movlps %xmm0, (%ecx,%eax,8)
-; X86-SSE1-NEXT: leal -4(%ebp), %esp
-; X86-SSE1-NEXT: popl %esi
+; X86-SSE1-NEXT: movl %ebp, %esp
; X86-SSE1-NEXT: popl %ebp
; X86-SSE1-NEXT: retl
;
diff --git a/llvm/test/CodeGen/X86/atomic-non-integer.ll b/llvm/test/CodeGen/X86/atomic-non-integer.ll
index 8b2ed638af2a..664f195e3884 100644
--- a/llvm/test/CodeGen/X86/atomic-non-integer.ll
+++ b/llvm/test/CodeGen/X86/atomic-non-integer.ll
@@ -272,17 +272,16 @@ define float @load_float(float* %fptr) {
define double @load_double(double* %fptr) {
; X86-SSE1-LABEL: load_double:
; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: subl $20, %esp
-; X86-SSE1-NEXT: .cfi_def_cfa_offset 24
+; X86-SSE1-NEXT: subl $12, %esp
+; X86-SSE1-NEXT: .cfi_def_cfa_offset 16
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE1-NEXT: fildll (%eax)
-; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movl %eax, (%esp)
+; X86-SSE1-NEXT: xorps %xmm0, %xmm0
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fldl (%esp)
-; X86-SSE1-NEXT: addl $20, %esp
+; X86-SSE1-NEXT: addl $12, %esp
; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
; X86-SSE1-NEXT: retl
;
@@ -660,17 +659,16 @@ define float @load_float_seq_cst(float* %fptr) {
define double @load_double_seq_cst(double* %fptr) {
; X86-SSE1-LABEL: load_double_seq_cst:
; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: subl $20, %esp
-; X86-SSE1-NEXT: .cfi_def_cfa_offset 24
+; X86-SSE1-NEXT: subl $12, %esp
+; X86-SSE1-NEXT: .cfi_def_cfa_offset 16
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE1-NEXT: fildll (%eax)
-; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movl %eax, (%esp)
+; X86-SSE1-NEXT: xorps %xmm0, %xmm0
+; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: fldl (%esp)
-; X86-SSE1-NEXT: addl $20, %esp
+; X86-SSE1-NEXT: addl $12, %esp
; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
; X86-SSE1-NEXT: retl
;
More information about the llvm-commits
mailing list