[llvm] 2a10f80 - [X86] Use FIST for i64 atomic stores on 32-bit targets without SSE.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 23 15:14:22 PST 2020
Author: Craig Topper
Date: 2020-02-23T15:11:38-08:00
New Revision: 2a10f8019dd9525c91d1f2f74538c83edfc16bee
URL: https://github.com/llvm/llvm-project/commit/2a10f8019dd9525c91d1f2f74538c83edfc16bee
DIFF: https://github.com/llvm/llvm-project/commit/2a10f8019dd9525c91d1f2f74538c83edfc16bee.diff
LOG: [X86] Use FIST for i64 atomic stores on 32-bit targets without SSE.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/atomic-fp.ll
llvm/test/CodeGen/X86/atomic-load-store-wide.ll
llvm/test/CodeGen/X86/atomic-mi.ll
llvm/test/CodeGen/X86/atomic-non-integer.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 056fc9b7ff6d..b7ff255f0881 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -27518,14 +27518,14 @@ bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const {
return false;
}
-// TODO: In 32-bit mode, use FISTP when X87 is available?
bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
Type *MemType = SI->getValueOperand()->getType();
bool NoImplicitFloatOps =
SI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
- !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE1())
+ !Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
+ (Subtarget.hasSSE1() || Subtarget.hasX87()))
return false;
return needsCmpXchgNb(MemType);
@@ -28286,28 +28286,52 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
if (VT == MVT::i64 && !IsTypeLegal) {
// For illegal i64 atomic_stores, we can try to use MOVQ or MOVLPS if SSE
// is enabled.
- // FIXME: Use fist with X87.
bool NoImplicitFloatOps =
DAG.getMachineFunction().getFunction().hasFnAttribute(
Attribute::NoImplicitFloat);
- if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
- Subtarget.hasSSE1()) {
- SDValue SclToVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
- Node->getOperand(2));
- MVT StVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32;
- SclToVec = DAG.getBitcast(StVT, SclToVec);
- SDVTList Tys = DAG.getVTList(MVT::Other);
- SDValue Ops[] = { Node->getChain(), SclToVec, Node->getBasePtr() };
- SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys,
- Ops, MVT::i64,
- Node->getMemOperand());
-
- // If this is a sequentially consistent store, also emit an appropriate
- // barrier.
- if (IsSeqCst)
- Chain = emitLockedStackOp(DAG, Subtarget, Chain, dl);
-
- return Chain;
+ if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) {
+ SDValue Chain;
+ if (Subtarget.hasSSE1()) {
+ SDValue SclToVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
+ Node->getOperand(2));
+ MVT StVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32;
+ SclToVec = DAG.getBitcast(StVT, SclToVec);
+ SDVTList Tys = DAG.getVTList(MVT::Other);
+ SDValue Ops[] = {Node->getChain(), SclToVec, Node->getBasePtr()};
+ Chain = DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys, Ops,
+ MVT::i64, Node->getMemOperand());
+ } else if (Subtarget.hasX87()) {
+ // First load this into an 80-bit X87 register using a stack temporary.
+ // This will put the whole integer into the significand.
+ SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
+ Chain =
+ DAG.getStore(Node->getChain(), dl, Node->getOperand(2), StackPtr,
+ MPI, /*Align*/ 0, MachineMemOperand::MOStore);
+ SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
+ SDValue LdOps[] = {Chain, StackPtr};
+ SDValue Value =
+ DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, LdOps, MVT::i64, MPI,
+ /*Align*/ 0, MachineMemOperand::MOLoad);
+ Chain = Value.getValue(1);
+
+ // Now use an FIST to do the atomic store.
+ SDValue StoreOps[] = {Chain, Value, Node->getBasePtr()};
+ Chain =
+ DAG.getMemIntrinsicNode(X86ISD::FIST, dl, DAG.getVTList(MVT::Other),
+ StoreOps, MVT::i64, Node->getMemOperand());
+ }
+
+ if (Chain) {
+ // If this is a sequentially consistent store, also emit an appropriate
+ // barrier.
+ if (IsSeqCst)
+ Chain = emitLockedStackOp(DAG, Subtarget, Chain, dl);
+
+ return Chain;
+ }
}
}
diff --git a/llvm/test/CodeGen/X86/atomic-fp.ll b/llvm/test/CodeGen/X86/atomic-fp.ll
index 1f8f64399831..01e0480a815b 100644
--- a/llvm/test/CodeGen/X86/atomic-fp.ll
+++ b/llvm/test/CodeGen/X86/atomic-fp.ll
@@ -80,33 +80,25 @@ define void @fadd_64r(double* %loc, double %val) nounwind {
; X86-NOSSE: # %bb.0:
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
-; X86-NOSSE-NEXT: pushl %ebx
-; X86-NOSSE-NEXT: pushl %esi
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $24, %esp
-; X86-NOSSE-NEXT: movl 8(%ebp), %esi
-; X86-NOSSE-NEXT: fildll (%esi)
+; X86-NOSSE-NEXT: subl $32, %esp
+; X86-NOSSE-NEXT: movl 8(%ebp), %eax
+; X86-NOSSE-NEXT: fildll (%eax)
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: faddl 12(%ebp)
-; X86-NOSSE-NEXT: fstpl (%esp)
-; X86-NOSSE-NEXT: movl (%esp), %ebx
+; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl (%esi), %eax
-; X86-NOSSE-NEXT: movl 4(%esi), %edx
-; X86-NOSSE-NEXT: .p2align 4, 0x90
-; X86-NOSSE-NEXT: .LBB1_1: # %atomicrmw.start
-; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NOSSE-NEXT: lock cmpxchg8b (%esi)
-; X86-NOSSE-NEXT: jne .LBB1_1
-; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NOSSE-NEXT: leal -8(%ebp), %esp
-; X86-NOSSE-NEXT: popl %esi
-; X86-NOSSE-NEXT: popl %ebx
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOSSE-NEXT: movl %ecx, (%esp)
+; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fildll (%esp)
+; X86-NOSSE-NEXT: fistpll (%eax)
+; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
;
@@ -256,7 +248,6 @@ define void @fadd_64g() nounwind {
; X86-NOSSE: # %bb.0:
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
-; X86-NOSSE-NEXT: pushl %ebx
; X86-NOSSE-NEXT: andl $-8, %esp
; X86-NOSSE-NEXT: subl $32, %esp
; X86-NOSSE-NEXT: fildll glob64
@@ -267,19 +258,14 @@ define void @fadd_64g() nounwind {
; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fld1
; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fstpl (%esp)
-; X86-NOSSE-NEXT: movl (%esp), %ebx
+; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl glob64+4, %edx
-; X86-NOSSE-NEXT: movl glob64, %eax
-; X86-NOSSE-NEXT: .p2align 4, 0x90
-; X86-NOSSE-NEXT: .LBB3_1: # %atomicrmw.start
-; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NOSSE-NEXT: lock cmpxchg8b glob64
-; X86-NOSSE-NEXT: jne .LBB3_1
-; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NOSSE-NEXT: leal -4(%ebp), %esp
-; X86-NOSSE-NEXT: popl %ebx
+; X86-NOSSE-NEXT: movl %eax, (%esp)
+; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fildll (%esp)
+; X86-NOSSE-NEXT: fistpll glob64
+; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
;
@@ -426,7 +412,6 @@ define void @fadd_64imm() nounwind {
; X86-NOSSE: # %bb.0:
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
-; X86-NOSSE-NEXT: pushl %ebx
; X86-NOSSE-NEXT: andl $-8, %esp
; X86-NOSSE-NEXT: subl $32, %esp
; X86-NOSSE-NEXT: fildll -559038737
@@ -437,19 +422,14 @@ define void @fadd_64imm() nounwind {
; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fld1
; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fstpl (%esp)
-; X86-NOSSE-NEXT: movl (%esp), %ebx
+; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl -559038737, %eax
-; X86-NOSSE-NEXT: movl -559038733, %edx
-; X86-NOSSE-NEXT: .p2align 4, 0x90
-; X86-NOSSE-NEXT: .LBB5_1: # %atomicrmw.start
-; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NOSSE-NEXT: lock cmpxchg8b -559038737
-; X86-NOSSE-NEXT: jne .LBB5_1
-; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NOSSE-NEXT: leal -4(%ebp), %esp
-; X86-NOSSE-NEXT: popl %ebx
+; X86-NOSSE-NEXT: movl %eax, (%esp)
+; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fildll (%esp)
+; X86-NOSSE-NEXT: fistpll -559038737
+; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
;
@@ -602,10 +582,9 @@ define void @fadd_64stack() nounwind {
; X86-NOSSE: # %bb.0:
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
-; X86-NOSSE-NEXT: pushl %ebx
; X86-NOSSE-NEXT: andl $-8, %esp
; X86-NOSSE-NEXT: subl $40, %esp
-; X86-NOSSE-NEXT: fildll (%esp)
+; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
@@ -614,18 +593,13 @@ define void @fadd_64stack() nounwind {
; X86-NOSSE-NEXT: fld1
; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl (%esp), %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NOSSE-NEXT: .p2align 4, 0x90
-; X86-NOSSE-NEXT: .LBB7_1: # %atomicrmw.start
-; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NOSSE-NEXT: lock cmpxchg8b (%esp)
-; X86-NOSSE-NEXT: jne .LBB7_1
-; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NOSSE-NEXT: leal -4(%ebp), %esp
-; X86-NOSSE-NEXT: popl %ebx
+; X86-NOSSE-NEXT: movl %eax, (%esp)
+; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fildll (%esp)
+; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
;
@@ -709,36 +683,28 @@ define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) nounwind {
; X86-NOSSE: # %bb.0: # %bb
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
-; X86-NOSSE-NEXT: pushl %ebx
-; X86-NOSSE-NEXT: pushl %edi
; X86-NOSSE-NEXT: pushl %esi
; X86-NOSSE-NEXT: andl $-8, %esp
-; X86-NOSSE-NEXT: subl $32, %esp
-; X86-NOSSE-NEXT: movl 20(%ebp), %esi
-; X86-NOSSE-NEXT: movl 8(%ebp), %edi
-; X86-NOSSE-NEXT: fildll (%edi,%esi,8)
+; X86-NOSSE-NEXT: subl $40, %esp
+; X86-NOSSE-NEXT: movl 20(%ebp), %eax
+; X86-NOSSE-NEXT: movl 8(%ebp), %ecx
+; X86-NOSSE-NEXT: fildll (%ecx,%eax,8)
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: faddl 12(%ebp)
-; X86-NOSSE-NEXT: fstpl (%esp)
-; X86-NOSSE-NEXT: movl (%esp), %ebx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl (%edi,%esi,8), %eax
-; X86-NOSSE-NEXT: movl 4(%edi,%esi,8), %edx
-; X86-NOSSE-NEXT: .p2align 4, 0x90
-; X86-NOSSE-NEXT: .LBB8_1: # %atomicrmw.start
-; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NOSSE-NEXT: lock cmpxchg8b (%edi,%esi,8)
-; X86-NOSSE-NEXT: jne .LBB8_1
-; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NOSSE-NEXT: leal -12(%ebp), %esp
+; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NOSSE-NEXT: movl %edx, (%esp)
+; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fildll (%esp)
+; X86-NOSSE-NEXT: fistpll (%ecx,%eax,8)
+; X86-NOSSE-NEXT: leal -4(%ebp), %esp
; X86-NOSSE-NEXT: popl %esi
-; X86-NOSSE-NEXT: popl %edi
-; X86-NOSSE-NEXT: popl %ebx
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
;
diff --git a/llvm/test/CodeGen/X86/atomic-load-store-wide.ll b/llvm/test/CodeGen/X86/atomic-load-store-wide.ll
index bdb88564cf84..ebba18b0a315 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store-wide.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store-wide.ll
@@ -16,27 +16,24 @@ define void @test1(i64* %ptr, i64 %val1) {
;
; NOSSE-LABEL: test1:
; NOSSE: # %bb.0:
-; NOSSE-NEXT: pushl %ebx
-; NOSSE-NEXT: .cfi_def_cfa_offset 8
-; NOSSE-NEXT: pushl %esi
-; NOSSE-NEXT: .cfi_def_cfa_offset 12
-; NOSSE-NEXT: .cfi_offset %esi, -12
-; NOSSE-NEXT: .cfi_offset %ebx, -8
-; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; NOSSE-NEXT: movl (%esi), %eax
-; NOSSE-NEXT: movl 4(%esi), %edx
-; NOSSE-NEXT: .p2align 4, 0x90
-; NOSSE-NEXT: .LBB0_1: # %atomicrmw.start
-; NOSSE-NEXT: # =>This Inner Loop Header: Depth=1
-; NOSSE-NEXT: lock cmpxchg8b (%esi)
-; NOSSE-NEXT: jne .LBB0_1
-; NOSSE-NEXT: # %bb.2: # %atomicrmw.end
-; NOSSE-NEXT: popl %esi
+; NOSSE-NEXT: pushl %ebp
; NOSSE-NEXT: .cfi_def_cfa_offset 8
-; NOSSE-NEXT: popl %ebx
-; NOSSE-NEXT: .cfi_def_cfa_offset 4
+; NOSSE-NEXT: .cfi_offset %ebp, -8
+; NOSSE-NEXT: movl %esp, %ebp
+; NOSSE-NEXT: .cfi_def_cfa_register %ebp
+; NOSSE-NEXT: andl $-8, %esp
+; NOSSE-NEXT: subl $8, %esp
+; NOSSE-NEXT: movl 8(%ebp), %eax
+; NOSSE-NEXT: movl 12(%ebp), %ecx
+; NOSSE-NEXT: movl 16(%ebp), %edx
+; NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; NOSSE-NEXT: movl %ecx, (%esp)
+; NOSSE-NEXT: fildll (%esp)
+; NOSSE-NEXT: fistpll (%eax)
+; NOSSE-NEXT: lock orl $0, (%esp)
+; NOSSE-NEXT: movl %ebp, %esp
+; NOSSE-NEXT: popl %ebp
+; NOSSE-NEXT: .cfi_def_cfa %esp, 4
; NOSSE-NEXT: retl
store atomic i64 %val1, i64* %ptr seq_cst, align 8
ret void
diff --git a/llvm/test/CodeGen/X86/atomic-mi.ll b/llvm/test/CodeGen/X86/atomic-mi.ll
index f660d3311fdb..1c135c0377c6 100644
--- a/llvm/test/CodeGen/X86/atomic-mi.ll
+++ b/llvm/test/CodeGen/X86/atomic-mi.ll
@@ -84,27 +84,21 @@ define void @store_atomic_imm_64(i64* %p) {
;
; X32-LABEL: store_atomic_imm_64:
; X32: # %bb.0:
-; X32-NEXT: pushl %ebx
-; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: pushl %esi
-; X32-NEXT: .cfi_def_cfa_offset 12
-; X32-NEXT: .cfi_offset %esi, -12
-; X32-NEXT: .cfi_offset %ebx, -8
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: movl (%esi), %eax
-; X32-NEXT: movl 4(%esi), %edx
-; X32-NEXT: xorl %ecx, %ecx
-; X32-NEXT: movl $42, %ebx
-; X32-NEXT: .p2align 4, 0x90
-; X32-NEXT: .LBB3_1: # %atomicrmw.start
-; X32-NEXT: # =>This Inner Loop Header: Depth=1
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: jne .LBB3_1
-; X32-NEXT: # %bb.2: # %atomicrmw.end
-; X32-NEXT: popl %esi
+; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: popl %ebx
-; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: .cfi_offset %ebp, -8
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: .cfi_def_cfa_register %ebp
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $8, %esp
+; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X32-NEXT: movl $42, (%esp)
+; X32-NEXT: fildll (%esp)
+; X32-NEXT: fistpll (%eax)
+; X32-NEXT: movl %ebp, %esp
+; X32-NEXT: popl %ebp
+; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
; These are implemented with a CAS loop on 32 bit architectures, and thus
; cannot be optimized in the same way as the others.
@@ -123,27 +117,21 @@ define void @store_atomic_imm_64_big(i64* %p) {
;
; X32-LABEL: store_atomic_imm_64_big:
; X32: # %bb.0:
-; X32-NEXT: pushl %ebx
-; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: pushl %esi
-; X32-NEXT: .cfi_def_cfa_offset 12
-; X32-NEXT: .cfi_offset %esi, -12
-; X32-NEXT: .cfi_offset %ebx, -8
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: movl (%esi), %eax
-; X32-NEXT: movl 4(%esi), %edx
-; X32-NEXT: movl $23, %ecx
-; X32-NEXT: movl $1215752192, %ebx # imm = 0x4876E800
-; X32-NEXT: .p2align 4, 0x90
-; X32-NEXT: .LBB4_1: # %atomicrmw.start
-; X32-NEXT: # =>This Inner Loop Header: Depth=1
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: jne .LBB4_1
-; X32-NEXT: # %bb.2: # %atomicrmw.end
-; X32-NEXT: popl %esi
+; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: popl %ebx
-; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: .cfi_offset %ebp, -8
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: .cfi_def_cfa_register %ebp
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $8, %esp
+; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movl $23, {{[0-9]+}}(%esp)
+; X32-NEXT: movl $1215752192, (%esp) # imm = 0x4876E800
+; X32-NEXT: fildll (%esp)
+; X32-NEXT: fistpll (%eax)
+; X32-NEXT: movl %ebp, %esp
+; X32-NEXT: popl %ebp
+; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
store atomic i64 100000000000, i64* %p monotonic, align 8
ret void
@@ -336,30 +324,20 @@ define void @add_64i(i64* %p) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: pushl %ebx
-; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
-; X32-NEXT: subl $8, %esp
-; X32-NEXT: .cfi_offset %esi, -16
-; X32-NEXT: .cfi_offset %ebx, -12
-; X32-NEXT: movl 8(%ebp), %esi
-; X32-NEXT: fildll (%esi)
-; X32-NEXT: fistpll (%esp)
-; X32-NEXT: movl (%esp), %ebx
+; X32-NEXT: subl $16, %esp
+; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: fildll (%eax)
+; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: addl $2, %ebx
-; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: movl (%esi), %eax
-; X32-NEXT: movl 4(%esi), %edx
-; X32-NEXT: .p2align 4, 0x90
-; X32-NEXT: .LBB14_1: # %atomicrmw.start
-; X32-NEXT: # =>This Inner Loop Header: Depth=1
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: jne .LBB14_1
-; X32-NEXT: # %bb.2: # %atomicrmw.end
-; X32-NEXT: leal -8(%ebp), %esp
-; X32-NEXT: popl %esi
-; X32-NEXT: popl %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: addl $2, %ecx
+; X32-NEXT: adcl $0, %edx
+; X32-NEXT: movl %ecx, (%esp)
+; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X32-NEXT: fildll (%esp)
+; X32-NEXT: fistpll (%eax)
+; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
@@ -383,30 +361,20 @@ define void @add_64r(i64* %p, i64 %v) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: pushl %ebx
-; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
-; X32-NEXT: subl $8, %esp
-; X32-NEXT: .cfi_offset %esi, -16
-; X32-NEXT: .cfi_offset %ebx, -12
-; X32-NEXT: movl 8(%ebp), %esi
-; X32-NEXT: fildll (%esi)
-; X32-NEXT: fistpll (%esp)
-; X32-NEXT: movl (%esp), %ebx
+; X32-NEXT: subl $16, %esp
+; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: fildll (%eax)
+; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: addl 12(%ebp), %ebx
-; X32-NEXT: adcl 16(%ebp), %ecx
-; X32-NEXT: movl (%esi), %eax
-; X32-NEXT: movl 4(%esi), %edx
-; X32-NEXT: .p2align 4, 0x90
-; X32-NEXT: .LBB15_1: # %atomicrmw.start
-; X32-NEXT: # =>This Inner Loop Header: Depth=1
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: jne .LBB15_1
-; X32-NEXT: # %bb.2: # %atomicrmw.end
-; X32-NEXT: leal -8(%ebp), %esp
-; X32-NEXT: popl %esi
-; X32-NEXT: popl %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: addl 12(%ebp), %ecx
+; X32-NEXT: adcl 16(%ebp), %edx
+; X32-NEXT: movl %ecx, (%esp)
+; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X32-NEXT: fildll (%esp)
+; X32-NEXT: fistpll (%eax)
+; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
@@ -576,30 +544,20 @@ define void @sub_64r(i64* %p, i64 %v) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: pushl %ebx
-; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
-; X32-NEXT: subl $8, %esp
-; X32-NEXT: .cfi_offset %esi, -16
-; X32-NEXT: .cfi_offset %ebx, -12
-; X32-NEXT: movl 8(%ebp), %esi
-; X32-NEXT: fildll (%esi)
-; X32-NEXT: fistpll (%esp)
-; X32-NEXT: movl (%esp), %ebx
+; X32-NEXT: subl $16, %esp
+; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: fildll (%eax)
+; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: subl 12(%ebp), %ebx
-; X32-NEXT: sbbl 16(%ebp), %ecx
-; X32-NEXT: movl (%esi), %eax
-; X32-NEXT: movl 4(%esi), %edx
-; X32-NEXT: .p2align 4, 0x90
-; X32-NEXT: .LBB23_1: # %atomicrmw.start
-; X32-NEXT: # =>This Inner Loop Header: Depth=1
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: jne .LBB23_1
-; X32-NEXT: # %bb.2: # %atomicrmw.end
-; X32-NEXT: leal -8(%ebp), %esp
-; X32-NEXT: popl %esi
-; X32-NEXT: popl %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: subl 12(%ebp), %ecx
+; X32-NEXT: sbbl 16(%ebp), %edx
+; X32-NEXT: movl %ecx, (%esp)
+; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X32-NEXT: fildll (%esp)
+; X32-NEXT: fistpll (%eax)
+; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
@@ -751,29 +709,18 @@ define void @and_64i(i64* %p) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: pushl %ebx
-; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
-; X32-NEXT: subl $8, %esp
-; X32-NEXT: .cfi_offset %esi, -16
-; X32-NEXT: .cfi_offset %ebx, -12
-; X32-NEXT: movl 8(%ebp), %esi
-; X32-NEXT: fildll (%esi)
-; X32-NEXT: fistpll (%esp)
-; X32-NEXT: movl (%esp), %ebx
-; X32-NEXT: andl $2, %ebx
-; X32-NEXT: movl (%esi), %eax
-; X32-NEXT: movl 4(%esi), %edx
-; X32-NEXT: xorl %ecx, %ecx
-; X32-NEXT: .p2align 4, 0x90
-; X32-NEXT: .LBB31_1: # %atomicrmw.start
-; X32-NEXT: # =>This Inner Loop Header: Depth=1
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: jne .LBB31_1
-; X32-NEXT: # %bb.2: # %atomicrmw.end
-; X32-NEXT: leal -8(%ebp), %esp
-; X32-NEXT: popl %esi
-; X32-NEXT: popl %ebx
+; X32-NEXT: subl $16, %esp
+; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: fildll (%eax)
+; X32-NEXT: fistpll {{[0-9]+}}(%esp)
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: andl $2, %ecx
+; X32-NEXT: movl %ecx, (%esp)
+; X32-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X32-NEXT: fildll (%esp)
+; X32-NEXT: fistpll (%eax)
+; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
@@ -797,30 +744,20 @@ define void @and_64r(i64* %p, i64 %v) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: pushl %ebx
-; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
-; X32-NEXT: subl $8, %esp
-; X32-NEXT: .cfi_offset %esi, -16
-; X32-NEXT: .cfi_offset %ebx, -12
-; X32-NEXT: movl 8(%ebp), %esi
-; X32-NEXT: fildll (%esi)
-; X32-NEXT: fistpll (%esp)
-; X32-NEXT: movl (%esp), %ebx
+; X32-NEXT: subl $16, %esp
+; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: fildll (%eax)
+; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: andl 16(%ebp), %ecx
-; X32-NEXT: andl 12(%ebp), %ebx
-; X32-NEXT: movl (%esi), %eax
-; X32-NEXT: movl 4(%esi), %edx
-; X32-NEXT: .p2align 4, 0x90
-; X32-NEXT: .LBB32_1: # %atomicrmw.start
-; X32-NEXT: # =>This Inner Loop Header: Depth=1
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: jne .LBB32_1
-; X32-NEXT: # %bb.2: # %atomicrmw.end
-; X32-NEXT: leal -8(%ebp), %esp
-; X32-NEXT: popl %esi
-; X32-NEXT: popl %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: andl 16(%ebp), %edx
+; X32-NEXT: andl 12(%ebp), %ecx
+; X32-NEXT: movl %ecx, (%esp)
+; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X32-NEXT: fildll (%esp)
+; X32-NEXT: fistpll (%eax)
+; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
@@ -993,29 +930,19 @@ define void @or_64i(i64* %p) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: pushl %ebx
-; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
-; X32-NEXT: subl $8, %esp
-; X32-NEXT: .cfi_offset %esi, -16
-; X32-NEXT: .cfi_offset %ebx, -12
-; X32-NEXT: movl 8(%ebp), %esi
-; X32-NEXT: fildll (%esi)
-; X32-NEXT: fistpll (%esp)
-; X32-NEXT: movl (%esp), %ebx
+; X32-NEXT: subl $16, %esp
+; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: fildll (%eax)
+; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: orl $2, %ebx
-; X32-NEXT: movl (%esi), %eax
-; X32-NEXT: movl 4(%esi), %edx
-; X32-NEXT: .p2align 4, 0x90
-; X32-NEXT: .LBB41_1: # %atomicrmw.start
-; X32-NEXT: # =>This Inner Loop Header: Depth=1
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: jne .LBB41_1
-; X32-NEXT: # %bb.2: # %atomicrmw.end
-; X32-NEXT: leal -8(%ebp), %esp
-; X32-NEXT: popl %esi
-; X32-NEXT: popl %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: orl $2, %ecx
+; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X32-NEXT: movl %ecx, (%esp)
+; X32-NEXT: fildll (%esp)
+; X32-NEXT: fistpll (%eax)
+; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
@@ -1039,30 +966,20 @@ define void @or_64r(i64* %p, i64 %v) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: pushl %ebx
-; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
-; X32-NEXT: subl $8, %esp
-; X32-NEXT: .cfi_offset %esi, -16
-; X32-NEXT: .cfi_offset %ebx, -12
-; X32-NEXT: movl 8(%ebp), %esi
-; X32-NEXT: fildll (%esi)
-; X32-NEXT: fistpll (%esp)
-; X32-NEXT: movl (%esp), %ebx
+; X32-NEXT: subl $16, %esp
+; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: fildll (%eax)
+; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: orl 16(%ebp), %ecx
-; X32-NEXT: orl 12(%ebp), %ebx
-; X32-NEXT: movl (%esi), %eax
-; X32-NEXT: movl 4(%esi), %edx
-; X32-NEXT: .p2align 4, 0x90
-; X32-NEXT: .LBB42_1: # %atomicrmw.start
-; X32-NEXT: # =>This Inner Loop Header: Depth=1
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: jne .LBB42_1
-; X32-NEXT: # %bb.2: # %atomicrmw.end
-; X32-NEXT: leal -8(%ebp), %esp
-; X32-NEXT: popl %esi
-; X32-NEXT: popl %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: orl 16(%ebp), %edx
+; X32-NEXT: orl 12(%ebp), %ecx
+; X32-NEXT: movl %ecx, (%esp)
+; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X32-NEXT: fildll (%esp)
+; X32-NEXT: fistpll (%eax)
+; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
@@ -1235,29 +1152,19 @@ define void @xor_64i(i64* %p) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: pushl %ebx
-; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
-; X32-NEXT: subl $8, %esp
-; X32-NEXT: .cfi_offset %esi, -16
-; X32-NEXT: .cfi_offset %ebx, -12
-; X32-NEXT: movl 8(%ebp), %esi
-; X32-NEXT: fildll (%esi)
-; X32-NEXT: fistpll (%esp)
-; X32-NEXT: movl (%esp), %ebx
+; X32-NEXT: subl $16, %esp
+; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: fildll (%eax)
+; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: xorl $2, %ebx
-; X32-NEXT: movl (%esi), %eax
-; X32-NEXT: movl 4(%esi), %edx
-; X32-NEXT: .p2align 4, 0x90
-; X32-NEXT: .LBB51_1: # %atomicrmw.start
-; X32-NEXT: # =>This Inner Loop Header: Depth=1
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: jne .LBB51_1
-; X32-NEXT: # %bb.2: # %atomicrmw.end
-; X32-NEXT: leal -8(%ebp), %esp
-; X32-NEXT: popl %esi
-; X32-NEXT: popl %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: xorl $2, %ecx
+; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X32-NEXT: movl %ecx, (%esp)
+; X32-NEXT: fildll (%esp)
+; X32-NEXT: fistpll (%eax)
+; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
@@ -1281,30 +1188,20 @@ define void @xor_64r(i64* %p, i64 %v) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: pushl %ebx
-; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
-; X32-NEXT: subl $8, %esp
-; X32-NEXT: .cfi_offset %esi, -16
-; X32-NEXT: .cfi_offset %ebx, -12
-; X32-NEXT: movl 8(%ebp), %esi
-; X32-NEXT: fildll (%esi)
-; X32-NEXT: fistpll (%esp)
-; X32-NEXT: movl (%esp), %ebx
+; X32-NEXT: subl $16, %esp
+; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: fildll (%eax)
+; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: xorl 16(%ebp), %ecx
-; X32-NEXT: xorl 12(%ebp), %ebx
-; X32-NEXT: movl (%esi), %eax
-; X32-NEXT: movl 4(%esi), %edx
-; X32-NEXT: .p2align 4, 0x90
-; X32-NEXT: .LBB52_1: # %atomicrmw.start
-; X32-NEXT: # =>This Inner Loop Header: Depth=1
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: jne .LBB52_1
-; X32-NEXT: # %bb.2: # %atomicrmw.end
-; X32-NEXT: leal -8(%ebp), %esp
-; X32-NEXT: popl %esi
-; X32-NEXT: popl %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: xorl 16(%ebp), %edx
+; X32-NEXT: xorl 12(%ebp), %ecx
+; X32-NEXT: movl %ecx, (%esp)
+; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X32-NEXT: fildll (%esp)
+; X32-NEXT: fistpll (%eax)
+; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
@@ -1438,30 +1335,20 @@ define void @inc_64(i64* %p) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: pushl %ebx
-; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
-; X32-NEXT: subl $8, %esp
-; X32-NEXT: .cfi_offset %esi, -16
-; X32-NEXT: .cfi_offset %ebx, -12
-; X32-NEXT: movl 8(%ebp), %esi
-; X32-NEXT: fildll (%esi)
-; X32-NEXT: fistpll (%esp)
-; X32-NEXT: movl (%esp), %ebx
+; X32-NEXT: subl $16, %esp
+; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: fildll (%eax)
+; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: addl $1, %ebx
-; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: movl (%esi), %eax
-; X32-NEXT: movl 4(%esi), %edx
-; X32-NEXT: .p2align 4, 0x90
-; X32-NEXT: .LBB58_1: # %atomicrmw.start
-; X32-NEXT: # =>This Inner Loop Header: Depth=1
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: jne .LBB58_1
-; X32-NEXT: # %bb.2: # %atomicrmw.end
-; X32-NEXT: leal -8(%ebp), %esp
-; X32-NEXT: popl %esi
-; X32-NEXT: popl %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: addl $1, %ecx
+; X32-NEXT: adcl $0, %edx
+; X32-NEXT: movl %ecx, (%esp)
+; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X32-NEXT: fildll (%esp)
+; X32-NEXT: fistpll (%eax)
+; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
@@ -1586,30 +1473,20 @@ define void @dec_64(i64* %p) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: pushl %ebx
-; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
-; X32-NEXT: subl $8, %esp
-; X32-NEXT: .cfi_offset %esi, -16
-; X32-NEXT: .cfi_offset %ebx, -12
-; X32-NEXT: movl 8(%ebp), %esi
-; X32-NEXT: fildll (%esi)
-; X32-NEXT: fistpll (%esp)
-; X32-NEXT: movl (%esp), %ebx
+; X32-NEXT: subl $16, %esp
+; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: fildll (%eax)
+; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: addl $-1, %ebx
-; X32-NEXT: adcl $-1, %ecx
-; X32-NEXT: movl (%esi), %eax
-; X32-NEXT: movl 4(%esi), %edx
-; X32-NEXT: .p2align 4, 0x90
-; X32-NEXT: .LBB63_1: # %atomicrmw.start
-; X32-NEXT: # =>This Inner Loop Header: Depth=1
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: jne .LBB63_1
-; X32-NEXT: # %bb.2: # %atomicrmw.end
-; X32-NEXT: leal -8(%ebp), %esp
-; X32-NEXT: popl %esi
-; X32-NEXT: popl %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: addl $-1, %ecx
+; X32-NEXT: adcl $-1, %edx
+; X32-NEXT: movl %ecx, (%esp)
+; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X32-NEXT: fildll (%esp)
+; X32-NEXT: fistpll (%eax)
+; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
@@ -1719,30 +1596,20 @@ define void @not_64(i64* %p) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: pushl %ebx
-; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
-; X32-NEXT: subl $8, %esp
-; X32-NEXT: .cfi_offset %esi, -16
-; X32-NEXT: .cfi_offset %ebx, -12
-; X32-NEXT: movl 8(%ebp), %esi
-; X32-NEXT: fildll (%esi)
-; X32-NEXT: fistpll (%esp)
-; X32-NEXT: movl (%esp), %ebx
+; X32-NEXT: subl $16, %esp
+; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: fildll (%eax)
+; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: notl %edx
; X32-NEXT: notl %ecx
-; X32-NEXT: notl %ebx
-; X32-NEXT: movl (%esi), %eax
-; X32-NEXT: movl 4(%esi), %edx
-; X32-NEXT: .p2align 4, 0x90
-; X32-NEXT: .LBB68_1: # %atomicrmw.start
-; X32-NEXT: # =>This Inner Loop Header: Depth=1
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: jne .LBB68_1
-; X32-NEXT: # %bb.2: # %atomicrmw.end
-; X32-NEXT: leal -8(%ebp), %esp
-; X32-NEXT: popl %esi
-; X32-NEXT: popl %ebx
+; X32-NEXT: movl %ecx, (%esp)
+; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X32-NEXT: fildll (%esp)
+; X32-NEXT: fistpll (%eax)
+; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
@@ -1844,30 +1711,20 @@ define void @neg_64(i64* %p) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: pushl %ebx
-; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
-; X32-NEXT: subl $8, %esp
-; X32-NEXT: .cfi_offset %esi, -16
-; X32-NEXT: .cfi_offset %ebx, -12
-; X32-NEXT: movl 8(%ebp), %esi
-; X32-NEXT: fildll (%esi)
-; X32-NEXT: fistpll (%esp)
+; X32-NEXT: subl $16, %esp
+; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: fildll (%eax)
+; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: xorl %ecx, %ecx
-; X32-NEXT: xorl %ebx, %ebx
-; X32-NEXT: subl (%esp), %ebx
+; X32-NEXT: xorl %edx, %edx
+; X32-NEXT: subl {{[0-9]+}}(%esp), %edx
; X32-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl (%esi), %eax
-; X32-NEXT: movl 4(%esi), %edx
-; X32-NEXT: .p2align 4, 0x90
-; X32-NEXT: .LBB73_1: # %atomicrmw.start
-; X32-NEXT: # =>This Inner Loop Header: Depth=1
-; X32-NEXT: lock cmpxchg8b (%esi)
-; X32-NEXT: jne .LBB73_1
-; X32-NEXT: # %bb.2: # %atomicrmw.end
-; X32-NEXT: leal -8(%ebp), %esp
-; X32-NEXT: popl %esi
-; X32-NEXT: popl %ebx
+; X32-NEXT: movl %edx, (%esp)
+; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X32-NEXT: fildll (%esp)
+; X32-NEXT: fistpll (%eax)
+; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/atomic-non-integer.ll b/llvm/test/CodeGen/X86/atomic-non-integer.ll
index e635a59cfdae..8b2ed638af2a 100644
--- a/llvm/test/CodeGen/X86/atomic-non-integer.ll
+++ b/llvm/test/CodeGen/X86/atomic-non-integer.ll
@@ -77,26 +77,16 @@ define void @store_double(double* %fptr, double %v) {
;
; X86-NOSSE-LABEL: store_double:
; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl %ebx
-; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
-; X86-NOSSE-NEXT: pushl %esi
-; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12
-; X86-NOSSE-NEXT: .cfi_offset %esi, -12
-; X86-NOSSE-NEXT: .cfi_offset %ebx, -8
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NOSSE-NEXT: subl $12, %esp
+; X86-NOSSE-NEXT: .cfi_def_cfa_offset 16
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl (%esi), %eax
-; X86-NOSSE-NEXT: movl 4(%esi), %edx
-; X86-NOSSE-NEXT: .p2align 4, 0x90
-; X86-NOSSE-NEXT: .LBB2_1: # %atomicrmw.start
-; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NOSSE-NEXT: lock cmpxchg8b (%esi)
-; X86-NOSSE-NEXT: jne .LBB2_1
-; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NOSSE-NEXT: popl %esi
-; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
-; X86-NOSSE-NEXT: popl %ebx
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl %ecx, (%esp)
+; X86-NOSSE-NEXT: fildll (%esp)
+; X86-NOSSE-NEXT: fistpll (%eax)
+; X86-NOSSE-NEXT: addl $12, %esp
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4
; X86-NOSSE-NEXT: retl
;
@@ -576,26 +566,17 @@ define void @store_double_seq_cst(double* %fptr, double %v) {
;
; X86-NOSSE-LABEL: store_double_seq_cst:
; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl %ebx
-; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
-; X86-NOSSE-NEXT: pushl %esi
-; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12
-; X86-NOSSE-NEXT: .cfi_offset %esi, -12
-; X86-NOSSE-NEXT: .cfi_offset %ebx, -8
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NOSSE-NEXT: subl $12, %esp
+; X86-NOSSE-NEXT: .cfi_def_cfa_offset 16
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl (%esi), %eax
-; X86-NOSSE-NEXT: movl 4(%esi), %edx
-; X86-NOSSE-NEXT: .p2align 4, 0x90
-; X86-NOSSE-NEXT: .LBB9_1: # %atomicrmw.start
-; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NOSSE-NEXT: lock cmpxchg8b (%esi)
-; X86-NOSSE-NEXT: jne .LBB9_1
-; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NOSSE-NEXT: popl %esi
-; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
-; X86-NOSSE-NEXT: popl %ebx
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: movl %ecx, (%esp)
+; X86-NOSSE-NEXT: fildll (%esp)
+; X86-NOSSE-NEXT: fistpll (%eax)
+; X86-NOSSE-NEXT: lock orl $0, (%esp)
+; X86-NOSSE-NEXT: addl $12, %esp
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4
; X86-NOSSE-NEXT: retl
;
More information about the llvm-commits
mailing list