[llvm] r358211 - [X86] Use FILD/FIST to implement i64 atomic load on 32-bit targets with X87, but no SSE2

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 11 11:40:21 PDT 2019


Author: ctopper
Date: Thu Apr 11 11:40:21 2019
New Revision: 358211

URL: http://llvm.org/viewvc/llvm-project?rev=358211&view=rev
Log:
[X86] Use FILD/FIST to implement i64 atomic load on 32-bit targets with X87, but no SSE2

If we have X87, but not SSE2 we can atomicaly load an i64 value into the significand of an 80-bit extended precision x87 register using fild. We can then use a fist instruction to convert it back to an i64 integer and store it to a stack temporary. From there we can do two 32-bit loads to get the value into integer registers without worrying about atomicness.

This matches what gcc and icc do for this case and removes an existing FIXME.

Differential Revision: https://reviews.llvm.org/D60156

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.h
    llvm/trunk/lib/Target/X86/X86InstrFPStack.td
    llvm/trunk/test/CodeGen/X86/atomic-fp.ll
    llvm/trunk/test/CodeGen/X86/atomic-load-store-wide.ll
    llvm/trunk/test/CodeGen/X86/atomic-mi.ll
    llvm/trunk/test/CodeGen/X86/atomic-non-integer.ll
    llvm/trunk/test/CodeGen/X86/misched_phys_reg_assign_order.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=358211&r1=358210&r2=358211&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Apr 11 11:40:21 2019
@@ -25584,17 +25584,18 @@ bool X86TargetLowering::shouldExpandAtom
 
 // Note: this turns large loads into lock cmpxchg8b/16b.
 // TODO: In 32-bit mode, use MOVLPS when SSE1 is available?
-// TODO: In 32-bit mode, use FILD/FISTP when X87 is available?
 TargetLowering::AtomicExpansionKind
 X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
   Type *MemType = LI->getType();
 
   // If this a 64 bit atomic load on a 32-bit target and SSE2 is enabled, we
-  // can use movq to do the load.
+  // can use movq to do the load. If we have X87 we can load into an 80-bit
+  // X87 register and store it to a stack temporary.
   bool NoImplicitFloatOps =
       LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
   if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
-      !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2())
+      !Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
+      (Subtarget.hasSSE2() || Subtarget.hasX87()))
     return AtomicExpansionKind::None;
 
   return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
@@ -27440,23 +27441,57 @@ void X86TargetLowering::ReplaceNodeResul
     bool NoImplicitFloatOps =
         DAG.getMachineFunction().getFunction().hasFnAttribute(
             Attribute::NoImplicitFloat);
-    if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
-        Subtarget.hasSSE2()) {
+    if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) {
       auto *Node = cast<AtomicSDNode>(N);
-      // Use a VZEXT_LOAD which will be selected as MOVQ. Then extract the lower
-      // 64-bits.
-      SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
-      SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
-      SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
-                                           MVT::i64, Node->getMemOperand());
-      SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
-                                DAG.getIntPtrConstant(0, dl));
-      Results.push_back(Res);
-      Results.push_back(Ld.getValue(1));
-      return;
+      if (Subtarget.hasSSE2()) {
+        // Use a VZEXT_LOAD which will be selected as MOVQ. Then extract the
+        // lower 64-bits.
+        SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
+        SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
+        SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
+                                             MVT::i64, Node->getMemOperand());
+        SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
+                                  DAG.getIntPtrConstant(0, dl));
+        Results.push_back(Res);
+        Results.push_back(Ld.getValue(1));
+        return;
+      }
+      if (Subtarget.hasX87()) {
+        // First load this into an 80-bit X87 register. This will put the whole
+        // integer into the significand.
+        // FIXME: Do we need to glue? See FIXME comment in BuildFILD.
+        SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other, MVT::Glue);
+        SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
+        SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD_FLAG,
+                                                 dl, Tys, Ops, MVT::i64,
+                                                 Node->getMemOperand());
+        SDValue Chain = Result.getValue(1);
+        SDValue InFlag = Result.getValue(2);
+
+        // Now store the X87 register to a stack temporary and convert to i64.
+        // This store is not atomic and doesn't need to be.
+        // FIXME: We don't need a stack temporary if the result of the load
+        // is already being stored. We could just directly store there.
+        SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
+        int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+        MachinePointerInfo MPI =
+            MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
+        SDValue StoreOps[] = { Chain, Result, StackPtr, InFlag };
+        Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, dl,
+                                        DAG.getVTList(MVT::Other), StoreOps,
+                                        MVT::i64, MPI, 0 /*Align*/,
+                                        MachineMemOperand::MOStore);
+
+        // Finally load the value back from the stack temporary and return it.
+        // This load is not atomic and doesn't need to be.
+        // This load will be further type legalized.
+        Result = DAG.getLoad(MVT::i64, dl, Chain, StackPtr, MPI);
+        Results.push_back(Result);
+        Results.push_back(Result.getValue(1));
+        return;
+      }
     }
     // TODO: Use MOVLPS when SSE1 is available?
-    // TODO: Use FILD/FISTP when X87 is available?
     // Delegate to generic TypeLegalization. Situations we can really handle
     // should have already been dealt with by AtomicExpandPass.cpp.
     break;
@@ -27649,6 +27684,7 @@ const char *X86TargetLowering::getTarget
   case X86ISD::FXOR:               return "X86ISD::FXOR";
   case X86ISD::FILD:               return "X86ISD::FILD";
   case X86ISD::FILD_FLAG:          return "X86ISD::FILD_FLAG";
+  case X86ISD::FIST:               return "X86ISD::FIST";
   case X86ISD::FP_TO_INT_IN_MEM:   return "X86ISD::FP_TO_INT_IN_MEM";
   case X86ISD::FLD:                return "X86ISD::FLD";
   case X86ISD::FST:                return "X86ISD::FST";

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=358211&r1=358210&r2=358211&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Thu Apr 11 11:40:21 2019
@@ -608,16 +608,22 @@ namespace llvm {
       FILD,
       FILD_FLAG,
 
+      /// This instruction implements a fp->int store from FP stack
+      /// slots. This corresponds to the fist instruction. It takes a
+      /// chain operand, value to store, address, and glue. The memory VT
+      /// specifies the type to store as.
+      FIST,
+
       /// This instruction implements an extending load to FP stack slots.
       /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
       /// operand, and ptr to load from. The memory VT specifies the type to
       /// load from.
       FLD,
 
-      /// This instruction implements a truncating store to FP stack
+      /// This instruction implements a truncating store from FP stack
       /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
-      /// chain operand, value to store, and address. The memory VT specifies
-      /// the type to store as.
+      /// chain operand, value to store, address, and glue. The memory VT
+      /// specifies the type to store as.
       FST,
 
       /// This instruction grabs the address of the next argument

Modified: llvm/trunk/lib/Target/X86/X86InstrFPStack.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFPStack.td?rev=358211&r1=358210&r2=358211&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFPStack.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFPStack.td Thu Apr 11 11:40:21 2019
@@ -21,6 +21,7 @@ def SDTX86Fld       : SDTypeProfile<1, 1
 def SDTX86Fst       : SDTypeProfile<0, 2, [SDTCisFP<0>,
                                            SDTCisPtrTy<1>]>;
 def SDTX86Fild      : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
+def SDTX86Fist      : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
 def SDTX86Fnstsw    : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
 
 def SDTX86CwdStore  : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
@@ -35,6 +36,9 @@ def X86fild         : SDNode<"X86ISD::FI
 def X86fildflag     : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
                              [SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
                               SDNPMemOperand]>;
+def X86fist         : SDNode<"X86ISD::FIST", SDTX86Fist,
+                             [SDNPHasChain, SDNPInGlue, SDNPMayStore,
+                              SDNPMemOperand]>;
 def X86fp_stsw      : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>;
 def X86fp_to_mem : SDNode<"X86ISD::FP_TO_INT_IN_MEM", SDTX86Fst,
                           [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
@@ -79,6 +83,11 @@ def X86fildflag64 : PatFrag<(ops node:$p
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
 }]>;
 
+def X86fist64 : PatFrag<(ops node:$val, node:$ptr),
+                        (X86fist node:$val, node:$ptr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
 def X86fp_to_i16mem : PatFrag<(ops node:$val, node:$ptr),
                               (X86fp_to_mem node:$val, node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
@@ -760,6 +769,10 @@ def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD
 // Used to conv. i64 to f64 since there isn't a SSE version.
 def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m64 addr:$src)>;
 
+// Used to conv. between f80 and i64 for i64 atomic loads.
+def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m80 addr:$src)>;
+def : Pat<(X86fist64 RFP80:$src, addr:$op), (IST_Fp64m80 addr:$op, RFP80:$src)>;
+
 // FP extensions map onto simple pseudo-value conversions if they are to/from
 // the FP stack.
 def : Pat<(f64 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,

Modified: llvm/trunk/test/CodeGen/X86/atomic-fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic-fp.ll?rev=358211&r1=358210&r2=358211&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/atomic-fp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/atomic-fp.ll Thu Apr 11 11:40:21 2019
@@ -77,14 +77,13 @@ define void @fadd_64r(double* %loc, doub
 ; X86-NOSSE-NEXT:    pushl %ebx
 ; X86-NOSSE-NEXT:    pushl %esi
 ; X86-NOSSE-NEXT:    andl $-8, %esp
-; X86-NOSSE-NEXT:    subl $16, %esp
+; X86-NOSSE-NEXT:    subl $24, %esp
 ; X86-NOSSE-NEXT:    movl 8(%ebp), %esi
-; X86-NOSSE-NEXT:    xorl %eax, %eax
-; X86-NOSSE-NEXT:    xorl %edx, %edx
-; X86-NOSSE-NEXT:    xorl %ecx, %ecx
-; X86-NOSSE-NEXT:    xorl %ebx, %ebx
-; X86-NOSSE-NEXT:    lock cmpxchg8b (%esi)
-; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    fildll (%esi)
+; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    faddl 12(%ebp)
@@ -283,13 +282,12 @@ define void @fadd_64g() nounwind {
 ; X86-NOSSE-NEXT:    movl %esp, %ebp
 ; X86-NOSSE-NEXT:    pushl %ebx
 ; X86-NOSSE-NEXT:    andl $-8, %esp
-; X86-NOSSE-NEXT:    subl $24, %esp
-; X86-NOSSE-NEXT:    xorl %eax, %eax
-; X86-NOSSE-NEXT:    xorl %edx, %edx
-; X86-NOSSE-NEXT:    xorl %ecx, %ecx
-; X86-NOSSE-NEXT:    xorl %ebx, %ebx
-; X86-NOSSE-NEXT:    lock cmpxchg8b glob64
-; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    subl $32, %esp
+; X86-NOSSE-NEXT:    fildll glob64
+; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    fld1
 ; X86-NOSSE-NEXT:    faddl {{[0-9]+}}(%esp)
@@ -484,13 +482,12 @@ define void @fadd_64imm() nounwind {
 ; X86-NOSSE-NEXT:    movl %esp, %ebp
 ; X86-NOSSE-NEXT:    pushl %ebx
 ; X86-NOSSE-NEXT:    andl $-8, %esp
-; X86-NOSSE-NEXT:    subl $24, %esp
-; X86-NOSSE-NEXT:    xorl %eax, %eax
-; X86-NOSSE-NEXT:    xorl %edx, %edx
-; X86-NOSSE-NEXT:    xorl %ecx, %ecx
-; X86-NOSSE-NEXT:    xorl %ebx, %ebx
-; X86-NOSSE-NEXT:    lock cmpxchg8b -559038737
-; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    subl $32, %esp
+; X86-NOSSE-NEXT:    fildll -559038737
+; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    fld1
 ; X86-NOSSE-NEXT:    faddl {{[0-9]+}}(%esp)
@@ -691,13 +688,12 @@ define void @fadd_64stack() nounwind {
 ; X86-NOSSE-NEXT:    movl %esp, %ebp
 ; X86-NOSSE-NEXT:    pushl %ebx
 ; X86-NOSSE-NEXT:    andl $-8, %esp
-; X86-NOSSE-NEXT:    subl $32, %esp
-; X86-NOSSE-NEXT:    xorl %eax, %eax
-; X86-NOSSE-NEXT:    xorl %edx, %edx
-; X86-NOSSE-NEXT:    xorl %ecx, %ecx
-; X86-NOSSE-NEXT:    xorl %ebx, %ebx
-; X86-NOSSE-NEXT:    lock cmpxchg8b (%esp)
-; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    subl $40, %esp
+; X86-NOSSE-NEXT:    fildll (%esp)
+; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    fld1
 ; X86-NOSSE-NEXT:    faddl {{[0-9]+}}(%esp)
@@ -831,15 +827,14 @@ define void @fadd_array(i64* %arg, doubl
 ; X86-NOSSE-NEXT:    pushl %edi
 ; X86-NOSSE-NEXT:    pushl %esi
 ; X86-NOSSE-NEXT:    andl $-8, %esp
-; X86-NOSSE-NEXT:    subl $24, %esp
+; X86-NOSSE-NEXT:    subl $32, %esp
 ; X86-NOSSE-NEXT:    movl 20(%ebp), %esi
 ; X86-NOSSE-NEXT:    movl 8(%ebp), %edi
-; X86-NOSSE-NEXT:    xorl %eax, %eax
-; X86-NOSSE-NEXT:    xorl %edx, %edx
-; X86-NOSSE-NEXT:    xorl %ecx, %ecx
-; X86-NOSSE-NEXT:    xorl %ebx, %ebx
-; X86-NOSSE-NEXT:    lock cmpxchg8b (%edi,%esi,8)
-; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    fildll (%edi,%esi,8)
+; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    faddl 12(%ebp)

Modified: llvm/trunk/test/CodeGen/X86/atomic-load-store-wide.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic-load-store-wide.ll?rev=358211&r1=358210&r2=358211&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/atomic-load-store-wide.ll (original)
+++ llvm/trunk/test/CodeGen/X86/atomic-load-store-wide.ll Thu Apr 11 11:40:21 2019
@@ -45,22 +45,21 @@ define i64 @test2(i64* %ptr) {
 ;
 ; NOSSE-LABEL: test2:
 ; NOSSE:       # %bb.0:
-; NOSSE-NEXT:    pushl %ebx
+; NOSSE-NEXT:    pushl %ebp
 ; NOSSE-NEXT:    .cfi_def_cfa_offset 8
-; NOSSE-NEXT:    pushl %esi
-; NOSSE-NEXT:    .cfi_def_cfa_offset 12
-; NOSSE-NEXT:    .cfi_offset %esi, -12
-; NOSSE-NEXT:    .cfi_offset %ebx, -8
-; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; NOSSE-NEXT:    xorl %eax, %eax
-; NOSSE-NEXT:    xorl %edx, %edx
-; NOSSE-NEXT:    xorl %ecx, %ecx
-; NOSSE-NEXT:    xorl %ebx, %ebx
-; NOSSE-NEXT:    lock cmpxchg8b (%esi)
-; NOSSE-NEXT:    popl %esi
-; NOSSE-NEXT:    .cfi_def_cfa_offset 8
-; NOSSE-NEXT:    popl %ebx
-; NOSSE-NEXT:    .cfi_def_cfa_offset 4
+; NOSSE-NEXT:    .cfi_offset %ebp, -8
+; NOSSE-NEXT:    movl %esp, %ebp
+; NOSSE-NEXT:    .cfi_def_cfa_register %ebp
+; NOSSE-NEXT:    andl $-8, %esp
+; NOSSE-NEXT:    subl $8, %esp
+; NOSSE-NEXT:    movl 8(%ebp), %eax
+; NOSSE-NEXT:    fildll (%eax)
+; NOSSE-NEXT:    fistpll (%esp)
+; NOSSE-NEXT:    movl (%esp), %eax
+; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; NOSSE-NEXT:    movl %ebp, %esp
+; NOSSE-NEXT:    popl %ebp
+; NOSSE-NEXT:    .cfi_def_cfa %esp, 4
 ; NOSSE-NEXT:    retl
   %val = load atomic i64, i64* %ptr seq_cst, align 8
   ret i64 %val
@@ -102,22 +101,21 @@ define i64 @test4(i64* %ptr) {
 ;
 ; NOSSE-LABEL: test4:
 ; NOSSE:       # %bb.0:
-; NOSSE-NEXT:    pushl %ebx
-; NOSSE-NEXT:    .cfi_def_cfa_offset 8
-; NOSSE-NEXT:    pushl %esi
-; NOSSE-NEXT:    .cfi_def_cfa_offset 12
-; NOSSE-NEXT:    .cfi_offset %esi, -12
-; NOSSE-NEXT:    .cfi_offset %ebx, -8
-; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; NOSSE-NEXT:    xorl %eax, %eax
-; NOSSE-NEXT:    xorl %edx, %edx
-; NOSSE-NEXT:    xorl %ecx, %ecx
-; NOSSE-NEXT:    xorl %ebx, %ebx
-; NOSSE-NEXT:    lock cmpxchg8b (%esi)
-; NOSSE-NEXT:    popl %esi
+; NOSSE-NEXT:    pushl %ebp
 ; NOSSE-NEXT:    .cfi_def_cfa_offset 8
-; NOSSE-NEXT:    popl %ebx
-; NOSSE-NEXT:    .cfi_def_cfa_offset 4
+; NOSSE-NEXT:    .cfi_offset %ebp, -8
+; NOSSE-NEXT:    movl %esp, %ebp
+; NOSSE-NEXT:    .cfi_def_cfa_register %ebp
+; NOSSE-NEXT:    andl $-8, %esp
+; NOSSE-NEXT:    subl $8, %esp
+; NOSSE-NEXT:    movl 8(%ebp), %eax
+; NOSSE-NEXT:    fildll (%eax)
+; NOSSE-NEXT:    fistpll (%esp)
+; NOSSE-NEXT:    movl (%esp), %eax
+; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; NOSSE-NEXT:    movl %ebp, %esp
+; NOSSE-NEXT:    popl %ebp
+; NOSSE-NEXT:    .cfi_def_cfa %esp, 4
 ; NOSSE-NEXT:    retl
   %val = load atomic volatile i64, i64* %ptr seq_cst, align 8
   ret i64 %val

Modified: llvm/trunk/test/CodeGen/X86/atomic-mi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic-mi.ll?rev=358211&r1=358210&r2=358211&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/atomic-mi.ll (original)
+++ llvm/trunk/test/CodeGen/X86/atomic-mi.ll Thu Apr 11 11:40:21 2019
@@ -331,20 +331,22 @@ define void @add_64i(i64* %p) {
 ;
 ; X32-LABEL: add_64i:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %ebx
+; X32-NEXT:    pushl %ebp
 ; X32-NEXT:    .cfi_def_cfa_offset 8
+; X32-NEXT:    .cfi_offset %ebp, -8
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    .cfi_def_cfa_register %ebp
+; X32-NEXT:    pushl %ebx
 ; X32-NEXT:    pushl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 12
-; X32-NEXT:    .cfi_offset %esi, -12
-; X32-NEXT:    .cfi_offset %ebx, -8
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    xorl %eax, %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    xorl %ecx, %ecx
-; X32-NEXT:    xorl %ebx, %ebx
-; X32-NEXT:    lock cmpxchg8b (%esi)
-; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    movl %eax, %ebx
+; X32-NEXT:    andl $-8, %esp
+; X32-NEXT:    subl $8, %esp
+; X32-NEXT:    .cfi_offset %esi, -16
+; X32-NEXT:    .cfi_offset %ebx, -12
+; X32-NEXT:    movl 8(%ebp), %esi
+; X32-NEXT:    fildll (%esi)
+; X32-NEXT:    fistpll (%esp)
+; X32-NEXT:    movl (%esp), %ebx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    addl $2, %ebx
 ; X32-NEXT:    adcl $0, %ecx
 ; X32-NEXT:    movl (%esi), %eax
@@ -355,10 +357,11 @@ define void @add_64i(i64* %p) {
 ; X32-NEXT:    lock cmpxchg8b (%esi)
 ; X32-NEXT:    jne .LBB14_1
 ; X32-NEXT:  # %bb.2: # %atomicrmw.end
+; X32-NEXT:    leal -8(%ebp), %esp
 ; X32-NEXT:    popl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    popl %ebx
-; X32-NEXT:    .cfi_def_cfa_offset 4
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    .cfi_def_cfa %esp, 4
 ; X32-NEXT:    retl
 ;   We do not check X86-32 as it cannot do 'addq'.
   %1 = load atomic i64, i64* %p acquire, align 8
@@ -375,22 +378,24 @@ define void @add_64r(i64* %p, i64 %v) {
 ;
 ; X32-LABEL: add_64r:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %ebx
+; X32-NEXT:    pushl %ebp
 ; X32-NEXT:    .cfi_def_cfa_offset 8
+; X32-NEXT:    .cfi_offset %ebp, -8
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    .cfi_def_cfa_register %ebp
+; X32-NEXT:    pushl %ebx
 ; X32-NEXT:    pushl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 12
-; X32-NEXT:    .cfi_offset %esi, -12
-; X32-NEXT:    .cfi_offset %ebx, -8
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    xorl %eax, %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    xorl %ecx, %ecx
-; X32-NEXT:    xorl %ebx, %ebx
-; X32-NEXT:    lock cmpxchg8b (%esi)
-; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    movl %eax, %ebx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ebx
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    andl $-8, %esp
+; X32-NEXT:    subl $8, %esp
+; X32-NEXT:    .cfi_offset %esi, -16
+; X32-NEXT:    .cfi_offset %ebx, -12
+; X32-NEXT:    movl 8(%ebp), %esi
+; X32-NEXT:    fildll (%esi)
+; X32-NEXT:    fistpll (%esp)
+; X32-NEXT:    movl (%esp), %ebx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    addl 12(%ebp), %ebx
+; X32-NEXT:    adcl 16(%ebp), %ecx
 ; X32-NEXT:    movl (%esi), %eax
 ; X32-NEXT:    movl 4(%esi), %edx
 ; X32-NEXT:    .p2align 4, 0x90
@@ -399,10 +404,11 @@ define void @add_64r(i64* %p, i64 %v) {
 ; X32-NEXT:    lock cmpxchg8b (%esi)
 ; X32-NEXT:    jne .LBB15_1
 ; X32-NEXT:  # %bb.2: # %atomicrmw.end
+; X32-NEXT:    leal -8(%ebp), %esp
 ; X32-NEXT:    popl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    popl %ebx
-; X32-NEXT:    .cfi_def_cfa_offset 4
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    .cfi_def_cfa %esp, 4
 ; X32-NEXT:    retl
 ;   We do not check X86-32 as it cannot do 'addq'.
   %1 = load atomic i64, i64* %p acquire, align 8
@@ -565,22 +571,24 @@ define void @sub_64r(i64* %p, i64 %v) {
 ;
 ; X32-LABEL: sub_64r:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %ebx
+; X32-NEXT:    pushl %ebp
 ; X32-NEXT:    .cfi_def_cfa_offset 8
+; X32-NEXT:    .cfi_offset %ebp, -8
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    .cfi_def_cfa_register %ebp
+; X32-NEXT:    pushl %ebx
 ; X32-NEXT:    pushl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 12
-; X32-NEXT:    .cfi_offset %esi, -12
-; X32-NEXT:    .cfi_offset %ebx, -8
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    xorl %eax, %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    xorl %ecx, %ecx
-; X32-NEXT:    xorl %ebx, %ebx
-; X32-NEXT:    lock cmpxchg8b (%esi)
-; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    movl %eax, %ebx
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %ebx
-; X32-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    andl $-8, %esp
+; X32-NEXT:    subl $8, %esp
+; X32-NEXT:    .cfi_offset %esi, -16
+; X32-NEXT:    .cfi_offset %ebx, -12
+; X32-NEXT:    movl 8(%ebp), %esi
+; X32-NEXT:    fildll (%esi)
+; X32-NEXT:    fistpll (%esp)
+; X32-NEXT:    movl (%esp), %ebx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    subl 12(%ebp), %ebx
+; X32-NEXT:    sbbl 16(%ebp), %ecx
 ; X32-NEXT:    movl (%esi), %eax
 ; X32-NEXT:    movl 4(%esi), %edx
 ; X32-NEXT:    .p2align 4, 0x90
@@ -589,10 +597,11 @@ define void @sub_64r(i64* %p, i64 %v) {
 ; X32-NEXT:    lock cmpxchg8b (%esi)
 ; X32-NEXT:    jne .LBB23_1
 ; X32-NEXT:  # %bb.2: # %atomicrmw.end
+; X32-NEXT:    leal -8(%ebp), %esp
 ; X32-NEXT:    popl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    popl %ebx
-; X32-NEXT:    .cfi_def_cfa_offset 4
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    .cfi_def_cfa %esp, 4
 ; X32-NEXT:    retl
 ;   We do not check X86-32 as it cannot do 'subq'.
   %1 = load atomic i64, i64* %p acquire, align 8
@@ -737,19 +746,21 @@ define void @and_64i(i64* %p) {
 ;
 ; X32-LABEL: and_64i:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %ebx
+; X32-NEXT:    pushl %ebp
 ; X32-NEXT:    .cfi_def_cfa_offset 8
+; X32-NEXT:    .cfi_offset %ebp, -8
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    .cfi_def_cfa_register %ebp
+; X32-NEXT:    pushl %ebx
 ; X32-NEXT:    pushl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 12
-; X32-NEXT:    .cfi_offset %esi, -12
-; X32-NEXT:    .cfi_offset %ebx, -8
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    xorl %eax, %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    xorl %ecx, %ecx
-; X32-NEXT:    xorl %ebx, %ebx
-; X32-NEXT:    lock cmpxchg8b (%esi)
-; X32-NEXT:    movl %eax, %ebx
+; X32-NEXT:    andl $-8, %esp
+; X32-NEXT:    subl $8, %esp
+; X32-NEXT:    .cfi_offset %esi, -16
+; X32-NEXT:    .cfi_offset %ebx, -12
+; X32-NEXT:    movl 8(%ebp), %esi
+; X32-NEXT:    fildll (%esi)
+; X32-NEXT:    fistpll (%esp)
+; X32-NEXT:    movl (%esp), %ebx
 ; X32-NEXT:    andl $2, %ebx
 ; X32-NEXT:    movl (%esi), %eax
 ; X32-NEXT:    movl 4(%esi), %edx
@@ -760,10 +771,11 @@ define void @and_64i(i64* %p) {
 ; X32-NEXT:    lock cmpxchg8b (%esi)
 ; X32-NEXT:    jne .LBB31_1
 ; X32-NEXT:  # %bb.2: # %atomicrmw.end
+; X32-NEXT:    leal -8(%ebp), %esp
 ; X32-NEXT:    popl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    popl %ebx
-; X32-NEXT:    .cfi_def_cfa_offset 4
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    .cfi_def_cfa %esp, 4
 ; X32-NEXT:    retl
 ;   We do not check X86-32 as it cannot do 'andq'.
   %1 = load atomic i64, i64* %p acquire, align 8
@@ -780,22 +792,24 @@ define void @and_64r(i64* %p, i64 %v) {
 ;
 ; X32-LABEL: and_64r:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %ebx
+; X32-NEXT:    pushl %ebp
 ; X32-NEXT:    .cfi_def_cfa_offset 8
+; X32-NEXT:    .cfi_offset %ebp, -8
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    .cfi_def_cfa_register %ebp
+; X32-NEXT:    pushl %ebx
 ; X32-NEXT:    pushl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 12
-; X32-NEXT:    .cfi_offset %esi, -12
-; X32-NEXT:    .cfi_offset %ebx, -8
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    xorl %eax, %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    xorl %ecx, %ecx
-; X32-NEXT:    xorl %ebx, %ebx
-; X32-NEXT:    lock cmpxchg8b (%esi)
-; X32-NEXT:    movl %eax, %ebx
-; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %ebx
+; X32-NEXT:    andl $-8, %esp
+; X32-NEXT:    subl $8, %esp
+; X32-NEXT:    .cfi_offset %esi, -16
+; X32-NEXT:    .cfi_offset %ebx, -12
+; X32-NEXT:    movl 8(%ebp), %esi
+; X32-NEXT:    fildll (%esi)
+; X32-NEXT:    fistpll (%esp)
+; X32-NEXT:    movl (%esp), %ebx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    andl 16(%ebp), %ecx
+; X32-NEXT:    andl 12(%ebp), %ebx
 ; X32-NEXT:    movl (%esi), %eax
 ; X32-NEXT:    movl 4(%esi), %edx
 ; X32-NEXT:    .p2align 4, 0x90
@@ -804,10 +818,11 @@ define void @and_64r(i64* %p, i64 %v) {
 ; X32-NEXT:    lock cmpxchg8b (%esi)
 ; X32-NEXT:    jne .LBB32_1
 ; X32-NEXT:  # %bb.2: # %atomicrmw.end
+; X32-NEXT:    leal -8(%ebp), %esp
 ; X32-NEXT:    popl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    popl %ebx
-; X32-NEXT:    .cfi_def_cfa_offset 4
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    .cfi_def_cfa %esp, 4
 ; X32-NEXT:    retl
 ;   We do not check X86-32 as it cannot do 'andq'.
   %1 = load atomic i64, i64* %p acquire, align 8
@@ -973,20 +988,22 @@ define void @or_64i(i64* %p) {
 ;
 ; X32-LABEL: or_64i:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %ebx
+; X32-NEXT:    pushl %ebp
 ; X32-NEXT:    .cfi_def_cfa_offset 8
+; X32-NEXT:    .cfi_offset %ebp, -8
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    .cfi_def_cfa_register %ebp
+; X32-NEXT:    pushl %ebx
 ; X32-NEXT:    pushl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 12
-; X32-NEXT:    .cfi_offset %esi, -12
-; X32-NEXT:    .cfi_offset %ebx, -8
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    xorl %eax, %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    xorl %ecx, %ecx
-; X32-NEXT:    xorl %ebx, %ebx
-; X32-NEXT:    lock cmpxchg8b (%esi)
-; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    movl %eax, %ebx
+; X32-NEXT:    andl $-8, %esp
+; X32-NEXT:    subl $8, %esp
+; X32-NEXT:    .cfi_offset %esi, -16
+; X32-NEXT:    .cfi_offset %ebx, -12
+; X32-NEXT:    movl 8(%ebp), %esi
+; X32-NEXT:    fildll (%esi)
+; X32-NEXT:    fistpll (%esp)
+; X32-NEXT:    movl (%esp), %ebx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    orl $2, %ebx
 ; X32-NEXT:    movl (%esi), %eax
 ; X32-NEXT:    movl 4(%esi), %edx
@@ -996,10 +1013,11 @@ define void @or_64i(i64* %p) {
 ; X32-NEXT:    lock cmpxchg8b (%esi)
 ; X32-NEXT:    jne .LBB41_1
 ; X32-NEXT:  # %bb.2: # %atomicrmw.end
+; X32-NEXT:    leal -8(%ebp), %esp
 ; X32-NEXT:    popl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    popl %ebx
-; X32-NEXT:    .cfi_def_cfa_offset 4
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    .cfi_def_cfa %esp, 4
 ; X32-NEXT:    retl
 ;   We do not check X86-32 as it cannot do 'orq'.
   %1 = load atomic i64, i64* %p acquire, align 8
@@ -1016,22 +1034,24 @@ define void @or_64r(i64* %p, i64 %v) {
 ;
 ; X32-LABEL: or_64r:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %ebx
+; X32-NEXT:    pushl %ebp
 ; X32-NEXT:    .cfi_def_cfa_offset 8
+; X32-NEXT:    .cfi_offset %ebp, -8
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    .cfi_def_cfa_register %ebp
+; X32-NEXT:    pushl %ebx
 ; X32-NEXT:    pushl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 12
-; X32-NEXT:    .cfi_offset %esi, -12
-; X32-NEXT:    .cfi_offset %ebx, -8
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    xorl %eax, %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    xorl %ecx, %ecx
-; X32-NEXT:    xorl %ebx, %ebx
-; X32-NEXT:    lock cmpxchg8b (%esi)
-; X32-NEXT:    movl %eax, %ebx
-; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    orl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    orl {{[0-9]+}}(%esp), %ebx
+; X32-NEXT:    andl $-8, %esp
+; X32-NEXT:    subl $8, %esp
+; X32-NEXT:    .cfi_offset %esi, -16
+; X32-NEXT:    .cfi_offset %ebx, -12
+; X32-NEXT:    movl 8(%ebp), %esi
+; X32-NEXT:    fildll (%esi)
+; X32-NEXT:    fistpll (%esp)
+; X32-NEXT:    movl (%esp), %ebx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    orl 16(%ebp), %ecx
+; X32-NEXT:    orl 12(%ebp), %ebx
 ; X32-NEXT:    movl (%esi), %eax
 ; X32-NEXT:    movl 4(%esi), %edx
 ; X32-NEXT:    .p2align 4, 0x90
@@ -1040,10 +1060,11 @@ define void @or_64r(i64* %p, i64 %v) {
 ; X32-NEXT:    lock cmpxchg8b (%esi)
 ; X32-NEXT:    jne .LBB42_1
 ; X32-NEXT:  # %bb.2: # %atomicrmw.end
+; X32-NEXT:    leal -8(%ebp), %esp
 ; X32-NEXT:    popl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    popl %ebx
-; X32-NEXT:    .cfi_def_cfa_offset 4
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    .cfi_def_cfa %esp, 4
 ; X32-NEXT:    retl
 ;   We do not check X86-32 as it cannot do 'orq'.
   %1 = load atomic i64, i64* %p acquire, align 8
@@ -1209,20 +1230,22 @@ define void @xor_64i(i64* %p) {
 ;
 ; X32-LABEL: xor_64i:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %ebx
+; X32-NEXT:    pushl %ebp
 ; X32-NEXT:    .cfi_def_cfa_offset 8
+; X32-NEXT:    .cfi_offset %ebp, -8
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    .cfi_def_cfa_register %ebp
+; X32-NEXT:    pushl %ebx
 ; X32-NEXT:    pushl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 12
-; X32-NEXT:    .cfi_offset %esi, -12
-; X32-NEXT:    .cfi_offset %ebx, -8
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    xorl %eax, %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    xorl %ecx, %ecx
-; X32-NEXT:    xorl %ebx, %ebx
-; X32-NEXT:    lock cmpxchg8b (%esi)
-; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    movl %eax, %ebx
+; X32-NEXT:    andl $-8, %esp
+; X32-NEXT:    subl $8, %esp
+; X32-NEXT:    .cfi_offset %esi, -16
+; X32-NEXT:    .cfi_offset %ebx, -12
+; X32-NEXT:    movl 8(%ebp), %esi
+; X32-NEXT:    fildll (%esi)
+; X32-NEXT:    fistpll (%esp)
+; X32-NEXT:    movl (%esp), %ebx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    xorl $2, %ebx
 ; X32-NEXT:    movl (%esi), %eax
 ; X32-NEXT:    movl 4(%esi), %edx
@@ -1232,10 +1255,11 @@ define void @xor_64i(i64* %p) {
 ; X32-NEXT:    lock cmpxchg8b (%esi)
 ; X32-NEXT:    jne .LBB51_1
 ; X32-NEXT:  # %bb.2: # %atomicrmw.end
+; X32-NEXT:    leal -8(%ebp), %esp
 ; X32-NEXT:    popl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    popl %ebx
-; X32-NEXT:    .cfi_def_cfa_offset 4
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    .cfi_def_cfa %esp, 4
 ; X32-NEXT:    retl
 ;   We do not check X86-32 as it cannot do 'xorq'.
   %1 = load atomic i64, i64* %p acquire, align 8
@@ -1252,22 +1276,24 @@ define void @xor_64r(i64* %p, i64 %v) {
 ;
 ; X32-LABEL: xor_64r:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %ebx
+; X32-NEXT:    pushl %ebp
 ; X32-NEXT:    .cfi_def_cfa_offset 8
+; X32-NEXT:    .cfi_offset %ebp, -8
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    .cfi_def_cfa_register %ebp
+; X32-NEXT:    pushl %ebx
 ; X32-NEXT:    pushl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 12
-; X32-NEXT:    .cfi_offset %esi, -12
-; X32-NEXT:    .cfi_offset %ebx, -8
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    xorl %eax, %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    xorl %ecx, %ecx
-; X32-NEXT:    xorl %ebx, %ebx
-; X32-NEXT:    lock cmpxchg8b (%esi)
-; X32-NEXT:    movl %eax, %ebx
-; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    xorl {{[0-9]+}}(%esp), %ebx
+; X32-NEXT:    andl $-8, %esp
+; X32-NEXT:    subl $8, %esp
+; X32-NEXT:    .cfi_offset %esi, -16
+; X32-NEXT:    .cfi_offset %ebx, -12
+; X32-NEXT:    movl 8(%ebp), %esi
+; X32-NEXT:    fildll (%esi)
+; X32-NEXT:    fistpll (%esp)
+; X32-NEXT:    movl (%esp), %ebx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    xorl 16(%ebp), %ecx
+; X32-NEXT:    xorl 12(%ebp), %ebx
 ; X32-NEXT:    movl (%esi), %eax
 ; X32-NEXT:    movl 4(%esi), %edx
 ; X32-NEXT:    .p2align 4, 0x90
@@ -1276,10 +1302,11 @@ define void @xor_64r(i64* %p, i64 %v) {
 ; X32-NEXT:    lock cmpxchg8b (%esi)
 ; X32-NEXT:    jne .LBB52_1
 ; X32-NEXT:  # %bb.2: # %atomicrmw.end
+; X32-NEXT:    leal -8(%ebp), %esp
 ; X32-NEXT:    popl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    popl %ebx
-; X32-NEXT:    .cfi_def_cfa_offset 4
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    .cfi_def_cfa %esp, 4
 ; X32-NEXT:    retl
 ;   We do not check X86-32 as it cannot do 'xorq'.
   %1 = load atomic i64, i64* %p acquire, align 8
@@ -1406,20 +1433,22 @@ define void @inc_64(i64* %p) {
 ;
 ; X32-LABEL: inc_64:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %ebx
+; X32-NEXT:    pushl %ebp
 ; X32-NEXT:    .cfi_def_cfa_offset 8
+; X32-NEXT:    .cfi_offset %ebp, -8
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    .cfi_def_cfa_register %ebp
+; X32-NEXT:    pushl %ebx
 ; X32-NEXT:    pushl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 12
-; X32-NEXT:    .cfi_offset %esi, -12
-; X32-NEXT:    .cfi_offset %ebx, -8
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    xorl %eax, %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    xorl %ecx, %ecx
-; X32-NEXT:    xorl %ebx, %ebx
-; X32-NEXT:    lock cmpxchg8b (%esi)
-; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    movl %eax, %ebx
+; X32-NEXT:    andl $-8, %esp
+; X32-NEXT:    subl $8, %esp
+; X32-NEXT:    .cfi_offset %esi, -16
+; X32-NEXT:    .cfi_offset %ebx, -12
+; X32-NEXT:    movl 8(%ebp), %esi
+; X32-NEXT:    fildll (%esi)
+; X32-NEXT:    fistpll (%esp)
+; X32-NEXT:    movl (%esp), %ebx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    addl $1, %ebx
 ; X32-NEXT:    adcl $0, %ecx
 ; X32-NEXT:    movl (%esi), %eax
@@ -1430,10 +1459,11 @@ define void @inc_64(i64* %p) {
 ; X32-NEXT:    lock cmpxchg8b (%esi)
 ; X32-NEXT:    jne .LBB58_1
 ; X32-NEXT:  # %bb.2: # %atomicrmw.end
+; X32-NEXT:    leal -8(%ebp), %esp
 ; X32-NEXT:    popl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    popl %ebx
-; X32-NEXT:    .cfi_def_cfa_offset 4
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    .cfi_def_cfa %esp, 4
 ; X32-NEXT:    retl
 ;
 ; SLOW_INC-LABEL: inc_64:
@@ -1551,20 +1581,22 @@ define void @dec_64(i64* %p) {
 ;
 ; X32-LABEL: dec_64:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %ebx
+; X32-NEXT:    pushl %ebp
 ; X32-NEXT:    .cfi_def_cfa_offset 8
+; X32-NEXT:    .cfi_offset %ebp, -8
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    .cfi_def_cfa_register %ebp
+; X32-NEXT:    pushl %ebx
 ; X32-NEXT:    pushl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 12
-; X32-NEXT:    .cfi_offset %esi, -12
-; X32-NEXT:    .cfi_offset %ebx, -8
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    xorl %eax, %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    xorl %ecx, %ecx
-; X32-NEXT:    xorl %ebx, %ebx
-; X32-NEXT:    lock cmpxchg8b (%esi)
-; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    movl %eax, %ebx
+; X32-NEXT:    andl $-8, %esp
+; X32-NEXT:    subl $8, %esp
+; X32-NEXT:    .cfi_offset %esi, -16
+; X32-NEXT:    .cfi_offset %ebx, -12
+; X32-NEXT:    movl 8(%ebp), %esi
+; X32-NEXT:    fildll (%esi)
+; X32-NEXT:    fistpll (%esp)
+; X32-NEXT:    movl (%esp), %ebx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    addl $-1, %ebx
 ; X32-NEXT:    adcl $-1, %ecx
 ; X32-NEXT:    movl (%esi), %eax
@@ -1575,10 +1607,11 @@ define void @dec_64(i64* %p) {
 ; X32-NEXT:    lock cmpxchg8b (%esi)
 ; X32-NEXT:    jne .LBB63_1
 ; X32-NEXT:  # %bb.2: # %atomicrmw.end
+; X32-NEXT:    leal -8(%ebp), %esp
 ; X32-NEXT:    popl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    popl %ebx
-; X32-NEXT:    .cfi_def_cfa_offset 4
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    .cfi_def_cfa %esp, 4
 ; X32-NEXT:    retl
 ;
 ; SLOW_INC-LABEL: dec_64:
@@ -1681,20 +1714,22 @@ define void @not_64(i64* %p) {
 ;
 ; X32-LABEL: not_64:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %ebx
+; X32-NEXT:    pushl %ebp
 ; X32-NEXT:    .cfi_def_cfa_offset 8
+; X32-NEXT:    .cfi_offset %ebp, -8
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    .cfi_def_cfa_register %ebp
+; X32-NEXT:    pushl %ebx
 ; X32-NEXT:    pushl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 12
-; X32-NEXT:    .cfi_offset %esi, -12
-; X32-NEXT:    .cfi_offset %ebx, -8
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    xorl %eax, %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    xorl %ecx, %ecx
-; X32-NEXT:    xorl %ebx, %ebx
-; X32-NEXT:    lock cmpxchg8b (%esi)
-; X32-NEXT:    movl %eax, %ebx
-; X32-NEXT:    movl %edx, %ecx
+; X32-NEXT:    andl $-8, %esp
+; X32-NEXT:    subl $8, %esp
+; X32-NEXT:    .cfi_offset %esi, -16
+; X32-NEXT:    .cfi_offset %ebx, -12
+; X32-NEXT:    movl 8(%ebp), %esi
+; X32-NEXT:    fildll (%esi)
+; X32-NEXT:    fistpll (%esp)
+; X32-NEXT:    movl (%esp), %ebx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    notl %ecx
 ; X32-NEXT:    notl %ebx
 ; X32-NEXT:    movl (%esi), %eax
@@ -1705,10 +1740,11 @@ define void @not_64(i64* %p) {
 ; X32-NEXT:    lock cmpxchg8b (%esi)
 ; X32-NEXT:    jne .LBB68_1
 ; X32-NEXT:  # %bb.2: # %atomicrmw.end
+; X32-NEXT:    leal -8(%ebp), %esp
 ; X32-NEXT:    popl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    popl %ebx
-; X32-NEXT:    .cfi_def_cfa_offset 4
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    .cfi_def_cfa %esp, 4
 ; X32-NEXT:    retl
 ;   We do not check X86-32 as it cannot do 'notq'.
   %1 = load atomic i64, i64* %p acquire, align 8
@@ -1803,40 +1839,37 @@ define void @neg_64(i64* %p) {
 ;
 ; X32-LABEL: neg_64:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %ebx
+; X32-NEXT:    pushl %ebp
 ; X32-NEXT:    .cfi_def_cfa_offset 8
-; X32-NEXT:    pushl %edi
-; X32-NEXT:    .cfi_def_cfa_offset 12
+; X32-NEXT:    .cfi_offset %ebp, -8
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    .cfi_def_cfa_register %ebp
+; X32-NEXT:    pushl %ebx
 ; X32-NEXT:    pushl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 16
+; X32-NEXT:    andl $-8, %esp
+; X32-NEXT:    subl $8, %esp
 ; X32-NEXT:    .cfi_offset %esi, -16
-; X32-NEXT:    .cfi_offset %edi, -12
-; X32-NEXT:    .cfi_offset %ebx, -8
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X32-NEXT:    xorl %esi, %esi
-; X32-NEXT:    xorl %eax, %eax
-; X32-NEXT:    xorl %edx, %edx
+; X32-NEXT:    .cfi_offset %ebx, -12
+; X32-NEXT:    movl 8(%ebp), %esi
+; X32-NEXT:    fildll (%esi)
+; X32-NEXT:    fistpll (%esp)
 ; X32-NEXT:    xorl %ecx, %ecx
 ; X32-NEXT:    xorl %ebx, %ebx
-; X32-NEXT:    lock cmpxchg8b (%edi)
-; X32-NEXT:    movl %eax, %ebx
-; X32-NEXT:    negl %ebx
-; X32-NEXT:    sbbl %edx, %esi
-; X32-NEXT:    movl (%edi), %eax
-; X32-NEXT:    movl 4(%edi), %edx
+; X32-NEXT:    subl (%esp), %ebx
+; X32-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movl (%esi), %eax
+; X32-NEXT:    movl 4(%esi), %edx
 ; X32-NEXT:    .p2align 4, 0x90
 ; X32-NEXT:  .LBB73_1: # %atomicrmw.start
 ; X32-NEXT:    # =>This Inner Loop Header: Depth=1
-; X32-NEXT:    movl %esi, %ecx
-; X32-NEXT:    lock cmpxchg8b (%edi)
+; X32-NEXT:    lock cmpxchg8b (%esi)
 ; X32-NEXT:    jne .LBB73_1
 ; X32-NEXT:  # %bb.2: # %atomicrmw.end
+; X32-NEXT:    leal -8(%ebp), %esp
 ; X32-NEXT:    popl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 12
-; X32-NEXT:    popl %edi
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    popl %ebx
-; X32-NEXT:    .cfi_def_cfa_offset 4
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    .cfi_def_cfa %esp, 4
 ; X32-NEXT:    retl
 ;   We do neg check X86-32 as it canneg do 'negq'.
   %1 = load atomic i64, i64* %p acquire, align 8

Modified: llvm/trunk/test/CodeGen/X86/atomic-non-integer.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic-non-integer.ll?rev=358211&r1=358210&r2=358211&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/atomic-non-integer.ll (original)
+++ llvm/trunk/test/CodeGen/X86/atomic-non-integer.ll Thu Apr 11 11:40:21 2019
@@ -448,28 +448,17 @@ define double @load_double(double* %fptr
 ;
 ; X86-NOSSE-LABEL: load_double:
 ; X86-NOSSE:       # %bb.0:
-; X86-NOSSE-NEXT:    pushl %ebx
-; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 8
-; X86-NOSSE-NEXT:    pushl %esi
-; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 12
-; X86-NOSSE-NEXT:    subl $12, %esp
+; X86-NOSSE-NEXT:    subl $20, %esp
 ; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 24
-; X86-NOSSE-NEXT:    .cfi_offset %esi, -12
-; X86-NOSSE-NEXT:    .cfi_offset %ebx, -8
-; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NOSSE-NEXT:    xorl %eax, %eax
-; X86-NOSSE-NEXT:    xorl %edx, %edx
-; X86-NOSSE-NEXT:    xorl %ecx, %ecx
-; X86-NOSSE-NEXT:    xorl %ebx, %ebx
-; X86-NOSSE-NEXT:    lock cmpxchg8b (%esi)
-; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT:    fildll (%eax)
+; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    movl %eax, (%esp)
 ; X86-NOSSE-NEXT:    fldl (%esp)
-; X86-NOSSE-NEXT:    addl $12, %esp
-; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 12
-; X86-NOSSE-NEXT:    popl %esi
-; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 8
-; X86-NOSSE-NEXT:    popl %ebx
+; X86-NOSSE-NEXT:    addl $20, %esp
 ; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NOSSE-NEXT:    retl
 ;
@@ -827,28 +816,17 @@ define double @load_double_seq_cst(doubl
 ;
 ; X86-NOSSE-LABEL: load_double_seq_cst:
 ; X86-NOSSE:       # %bb.0:
-; X86-NOSSE-NEXT:    pushl %ebx
-; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 8
-; X86-NOSSE-NEXT:    pushl %esi
-; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 12
-; X86-NOSSE-NEXT:    subl $12, %esp
+; X86-NOSSE-NEXT:    subl $20, %esp
 ; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 24
-; X86-NOSSE-NEXT:    .cfi_offset %esi, -12
-; X86-NOSSE-NEXT:    .cfi_offset %ebx, -8
-; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NOSSE-NEXT:    xorl %eax, %eax
-; X86-NOSSE-NEXT:    xorl %edx, %edx
-; X86-NOSSE-NEXT:    xorl %ecx, %ecx
-; X86-NOSSE-NEXT:    xorl %ebx, %ebx
-; X86-NOSSE-NEXT:    lock cmpxchg8b (%esi)
-; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT:    fildll (%eax)
+; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    movl %eax, (%esp)
 ; X86-NOSSE-NEXT:    fldl (%esp)
-; X86-NOSSE-NEXT:    addl $12, %esp
-; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 12
-; X86-NOSSE-NEXT:    popl %esi
-; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 8
-; X86-NOSSE-NEXT:    popl %ebx
+; X86-NOSSE-NEXT:    addl $20, %esp
 ; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NOSSE-NEXT:    retl
 ;

Modified: llvm/trunk/test/CodeGen/X86/misched_phys_reg_assign_order.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/misched_phys_reg_assign_order.ll?rev=358211&r1=358210&r2=358211&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/misched_phys_reg_assign_order.ll (original)
+++ llvm/trunk/test/CodeGen/X86/misched_phys_reg_assign_order.ll Thu Apr 11 11:40:21 2019
@@ -49,4 +49,4 @@ k.end:
 
 declare i32 @m()
 
-attributes #0 = { "no-frame-pointer-elim-non-leaf" }
+attributes #0 = { noimplicitfloat "no-frame-pointer-elim-non-leaf" }




More information about the llvm-commits mailing list