[llvm] r356807 - [X86] Use movq for i64 atomic load on 32-bit targets when sse2 is enable
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 22 13:46:56 PDT 2019
Author: ctopper
Date: Fri Mar 22 13:46:56 2019
New Revision: 356807
URL: http://llvm.org/viewvc/llvm-project?rev=356807&view=rev
Log:
[X86] Use movq for i64 atomic load on 32-bit targets when sse2 is enable
We used a lock cmpxchg8b to do i64 atomic loads. But if we have SSE2 we can do better and use a plain movq to do the load instead.
I tried to just use an f64 atomic load and add isel patterns to MOVSD(which the domain fixing pass can turn to MOVQ), but the atomic_load SDNode in TargetSelectionDAG.td requires the type to be integer.
So I've emitted VZEXT_LOAD instead which should be selected by isel to a MOVQ. Hopefully we don't need a specific atomic flavor of this. I kept the memory operand from the original AtomicSDNode. I wasn't sure if I might need to set the MOVolatile flag?
I've left some FIXMEs for improvements we can do without SSE2.
Differential Revision: https://reviews.llvm.org/D59679
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/atomic-load-store-wide.ll
llvm/trunk/test/CodeGen/X86/atomic-non-integer.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=356807&r1=356806&r2=356807&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Mar 22 13:46:56 2019
@@ -485,6 +485,9 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
}
+ if (!Subtarget.is64Bit())
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
+
if (Subtarget.hasCmpxchg16b()) {
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
}
@@ -25494,11 +25497,22 @@ bool X86TargetLowering::shouldExpandAtom
}
// Note: this turns large loads into lock cmpxchg8b/16b.
-// FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b.
+// TODO: In 32-bit mode, use MOVLPS when SSE1 is available?
+// TODO: In 32-bit mode, use FILD/FISTP when X87 is available?
TargetLowering::AtomicExpansionKind
X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
- return needsCmpXchgNb(LI->getType()) ? AtomicExpansionKind::CmpXChg
- : AtomicExpansionKind::None;
+ Type *MemType = LI->getType();
+
+ // If this a 64 bit atomic load on a 32-bit target and SSE2 is enabled, we
+ // can use movq to do the load.
+ bool NoImplicitFloatOps =
+ LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
+ if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
+ !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2())
+ return AtomicExpansionKind::None;
+
+ return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
+ : AtomicExpansionKind::None;
}
TargetLowering::AtomicExpansionKind
@@ -27312,6 +27326,32 @@ void X86TargetLowering::ReplaceNodeResul
Results.push_back(EFLAGS.getValue(1));
return;
}
+ case ISD::ATOMIC_LOAD: {
+ assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
+ bool NoImplicitFloatOps =
+ DAG.getMachineFunction().getFunction().hasFnAttribute(
+ Attribute::NoImplicitFloat);
+ if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
+ Subtarget.hasSSE2()) {
+ auto *Node = cast<AtomicSDNode>(N);
+ // Use a VZEXT_LOAD which will be selected as MOVQ. Then extract the lower
+ // 64-bits.
+ SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
+ SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
+ SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
+ MVT::i64, Node->getMemOperand());
+ SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
+ DAG.getIntPtrConstant(0, dl));
+ Results.push_back(Res);
+ Results.push_back(Ld.getValue(1));
+ return;
+ }
+ // TODO: Use MOVLPS when SSE1 is available?
+ // TODO: Use FILD/FISTP when X87 is available?
+ // Delegate to generic TypeLegalization. Situations we can really handle
+ // should have already been dealt with by AtomicExpandPass.cpp.
+ break;
+ }
case ISD::ATOMIC_SWAP:
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
@@ -27323,11 +27363,10 @@ void X86TargetLowering::ReplaceNodeResul
case ISD::ATOMIC_LOAD_MAX:
case ISD::ATOMIC_LOAD_UMIN:
case ISD::ATOMIC_LOAD_UMAX:
- case ISD::ATOMIC_LOAD: {
// Delegate to generic TypeLegalization. Situations we can really handle
// should have already been dealt with by AtomicExpandPass.cpp.
break;
- }
+
case ISD::BITCAST: {
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
EVT DstVT = N->getValueType(0);
Modified: llvm/trunk/test/CodeGen/X86/atomic-load-store-wide.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic-load-store-wide.ll?rev=356807&r1=356806&r2=356807&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/atomic-load-store-wide.ll (original)
+++ llvm/trunk/test/CodeGen/X86/atomic-load-store-wide.ll Fri Mar 22 13:46:56 2019
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mcpu=corei7 -mtriple=i686-- -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mcpu=corei7 -mtriple=i686-- -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=SSE42
+; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=NOSSE
; 64-bit load/store on x86-32
; FIXME: The generated code can be substantially improved.
@@ -34,7 +35,40 @@ define void @test1(i64* %ptr, i64 %val1)
}
define i64 @test2(i64* %ptr) {
-; CHECK-LABEL: test2:
+; SSE42-LABEL: test2:
+; SSE42: # %bb.0:
+; SSE42-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE42-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE42-NEXT: movd %xmm0, %eax
+; SSE42-NEXT: pextrd $1, %xmm0, %edx
+; SSE42-NEXT: retl
+;
+; NOSSE-LABEL: test2:
+; NOSSE: # %bb.0:
+; NOSSE-NEXT: pushl %ebx
+; NOSSE-NEXT: .cfi_def_cfa_offset 8
+; NOSSE-NEXT: pushl %esi
+; NOSSE-NEXT: .cfi_def_cfa_offset 12
+; NOSSE-NEXT: .cfi_offset %esi, -12
+; NOSSE-NEXT: .cfi_offset %ebx, -8
+; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
+; NOSSE-NEXT: xorl %eax, %eax
+; NOSSE-NEXT: xorl %edx, %edx
+; NOSSE-NEXT: xorl %ecx, %ecx
+; NOSSE-NEXT: xorl %ebx, %ebx
+; NOSSE-NEXT: lock cmpxchg8b (%esi)
+; NOSSE-NEXT: popl %esi
+; NOSSE-NEXT: .cfi_def_cfa_offset 8
+; NOSSE-NEXT: popl %ebx
+; NOSSE-NEXT: .cfi_def_cfa_offset 4
+; NOSSE-NEXT: retl
+ %val = load atomic i64, i64* %ptr seq_cst, align 8
+ ret i64 %val
+}
+
+; Same as test2, but with noimplicitfloat.
+define i64 @test3(i64* %ptr) noimplicitfloat {
+; CHECK-LABEL: test3:
; CHECK: # %bb.0:
; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: .cfi_def_cfa_offset 8
Modified: llvm/trunk/test/CodeGen/X86/atomic-non-integer.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic-non-integer.ll?rev=356807&r1=356806&r2=356807&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/atomic-non-integer.ll (original)
+++ llvm/trunk/test/CodeGen/X86/atomic-non-integer.ll Fri Mar 22 13:46:56 2019
@@ -212,30 +212,13 @@ define float @load_float(float* %fptr) {
define double @load_double(double* %fptr) {
; X86-SSE-LABEL: load_double:
; X86-SSE: # %bb.0:
-; X86-SSE-NEXT: pushl %ebx
-; X86-SSE-NEXT: .cfi_def_cfa_offset 8
-; X86-SSE-NEXT: pushl %esi
-; X86-SSE-NEXT: .cfi_def_cfa_offset 12
; X86-SSE-NEXT: subl $12, %esp
-; X86-SSE-NEXT: .cfi_def_cfa_offset 24
-; X86-SSE-NEXT: .cfi_offset %esi, -12
-; X86-SSE-NEXT: .cfi_offset %ebx, -8
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SSE-NEXT: xorl %eax, %eax
-; X86-SSE-NEXT: xorl %edx, %edx
-; X86-SSE-NEXT: xorl %ecx, %ecx
-; X86-SSE-NEXT: xorl %ebx, %ebx
-; X86-SSE-NEXT: lock cmpxchg8b (%esi)
-; X86-SSE-NEXT: movd %edx, %xmm0
-; X86-SSE-NEXT: movd %eax, %xmm1
-; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X86-SSE-NEXT: movq %xmm1, (%esp)
+; X86-SSE-NEXT: .cfi_def_cfa_offset 16
+; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE-NEXT: movlps %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
; X86-SSE-NEXT: addl $12, %esp
-; X86-SSE-NEXT: .cfi_def_cfa_offset 12
-; X86-SSE-NEXT: popl %esi
-; X86-SSE-NEXT: .cfi_def_cfa_offset 8
-; X86-SSE-NEXT: popl %ebx
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
@@ -440,30 +423,13 @@ define float @load_float_seq_cst(float*
define double @load_double_seq_cst(double* %fptr) {
; X86-SSE-LABEL: load_double_seq_cst:
; X86-SSE: # %bb.0:
-; X86-SSE-NEXT: pushl %ebx
-; X86-SSE-NEXT: .cfi_def_cfa_offset 8
-; X86-SSE-NEXT: pushl %esi
-; X86-SSE-NEXT: .cfi_def_cfa_offset 12
; X86-SSE-NEXT: subl $12, %esp
-; X86-SSE-NEXT: .cfi_def_cfa_offset 24
-; X86-SSE-NEXT: .cfi_offset %esi, -12
-; X86-SSE-NEXT: .cfi_offset %ebx, -8
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SSE-NEXT: xorl %eax, %eax
-; X86-SSE-NEXT: xorl %edx, %edx
-; X86-SSE-NEXT: xorl %ecx, %ecx
-; X86-SSE-NEXT: xorl %ebx, %ebx
-; X86-SSE-NEXT: lock cmpxchg8b (%esi)
-; X86-SSE-NEXT: movd %edx, %xmm0
-; X86-SSE-NEXT: movd %eax, %xmm1
-; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X86-SSE-NEXT: movq %xmm1, (%esp)
+; X86-SSE-NEXT: .cfi_def_cfa_offset 16
+; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE-NEXT: movlps %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
; X86-SSE-NEXT: addl $12, %esp
-; X86-SSE-NEXT: .cfi_def_cfa_offset 12
-; X86-SSE-NEXT: popl %esi
-; X86-SSE-NEXT: .cfi_def_cfa_offset 8
-; X86-SSE-NEXT: popl %ebx
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
More information about the llvm-commits
mailing list