[llvm] r356078 - [X86] Check for 64-bit mode in X86Subtarget::hasCmpxchg16b()

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 13 11:48:51 PDT 2019


Author: ctopper
Date: Wed Mar 13 11:48:50 2019
New Revision: 356078

URL: http://llvm.org/viewvc/llvm-project?rev=356078&view=rev
Log:
[X86] Check for 64-bit mode in X86Subtarget::hasCmpxchg16b()

The feature flag alone can't be trusted since it can be passed via -mattr. Need to ensure 64-bit mode as well.

We had a 64 bit mode check on the instruction to make the assembler work correctly. But we weren't guarding any of our lowering code or the hooks for the AtomicExpandPass.

I've added 32-bit command lines to atomic128.ll with and without cx16. The tests there would all previously fail if -mattr=cx16 was passed to them. I had to move one test case for f128 to a new file as it seems to have a different 32-bit mode or possibly sse issue.

Differential Revision: https://reviews.llvm.org/D59308

Added:
    llvm/trunk/test/CodeGen/X86/atomicf128.ll
Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86InstrInfo.td
    llvm/trunk/lib/Target/X86/X86Subtarget.h
    llvm/trunk/test/CodeGen/X86/atomic128.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=356078&r1=356077&r2=356078&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Mar 13 11:48:50 2019
@@ -27209,6 +27209,8 @@ void X86TargetLowering::ReplaceNodeResul
     EVT T = N->getValueType(0);
     assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair");
     bool Regs64bit = T == MVT::i128;
+    assert((!Regs64bit || Subtarget.hasCmpxchg16b()) &&
+           "64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS requires CMPXCHG16B");
     MVT HalfT = Regs64bit ? MVT::i64 : MVT::i32;
     SDValue cpInL, cpInH;
     cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=356078&r1=356077&r2=356078&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Wed Mar 13 11:48:50 2019
@@ -2094,7 +2094,7 @@ def CMPXCHG8B : I<0xC7, MRM1m, (outs), (
 let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
 def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
                     "cmpxchg16b\t$dst", []>,
-                    TB, Requires<[HasCmpxchg16b, In64BitMode]>;
+                    TB, Requires<[HasCmpxchg16b]>;
 } // SchedRW, mayLoad, mayStore, hasSideEffects
 
 

Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=356078&r1=356077&r2=356078&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.h Wed Mar 13 11:48:50 2019
@@ -620,7 +620,7 @@ public:
   int getGatherOverhead() const { return GatherOverhead; }
   int getScatterOverhead() const { return ScatterOverhead; }
   bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
-  bool hasCmpxchg16b() const { return HasCmpxchg16b; }
+  bool hasCmpxchg16b() const { return HasCmpxchg16b && is64Bit(); }
   bool useLeaForSP() const { return UseLeaForSP; }
   bool hasPOPCNTFalseDeps() const { return HasPOPCNTFalseDeps; }
   bool hasLZCNTFalseDeps() const { return HasLZCNTFalseDeps; }

Modified: llvm/trunk/test/CodeGen/X86/atomic128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic128.ll?rev=356078&r1=356077&r2=356078&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/atomic128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/atomic128.ll Wed Mar 13 11:48:50 2019
@@ -1,5 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9 -verify-machineinstrs -mattr=cx16 | FileCheck %s
+; RUN: llc < %s -mtriple=i386-linux-gnu -verify-machineinstrs -mattr=cx16 | FileCheck %s -check-prefixes=CHECK32
+; RUN: llc < %s -mtriple=i386-linux-gnu -verify-machineinstrs -mattr=-cx16 | FileCheck %s -check-prefixes=CHECK32
 
 @var = global i128 0
 
@@ -18,6 +20,61 @@ define i128 @val_compare_and_swap(i128*
 ; CHECK-NEXT:    lock cmpxchg16b (%rdi)
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    retq
+;
+; CHECK32-LABEL: val_compare_and_swap:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    pushl %edi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK32-NEXT:    pushl %esi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 12
+; CHECK32-NEXT:    subl $20, %esp
+; CHECK32-NEXT:    .cfi_def_cfa_offset 32
+; CHECK32-NEXT:    .cfi_offset %esi, -12
+; CHECK32-NEXT:    .cfi_offset %edi, -8
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK32-NEXT:    subl $8, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 8
+; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl %eax
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    calll __sync_val_compare_and_swap_16
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
+; CHECK32-NEXT:    addl $44, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -44
+; CHECK32-NEXT:    movl (%esp), %eax
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK32-NEXT:    movl %edi, 8(%esi)
+; CHECK32-NEXT:    movl %edx, 12(%esi)
+; CHECK32-NEXT:    movl %eax, (%esi)
+; CHECK32-NEXT:    movl %ecx, 4(%esi)
+; CHECK32-NEXT:    movl %esi, %eax
+; CHECK32-NEXT:    addl $20, %esp
+; CHECK32-NEXT:    .cfi_def_cfa_offset 12
+; CHECK32-NEXT:    popl %esi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK32-NEXT:    popl %edi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 4
+; CHECK32-NEXT:    retl $4
   %pair = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire
   %val = extractvalue { i128, i1 } %pair, 0
   ret i128 %val
@@ -48,6 +105,46 @@ define void @fetch_and_nand(i128* %p, i1
 ; CHECK-NEXT:    movq %rdx, _var+{{.*}}(%rip)
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    retq
+;
+; CHECK32-LABEL: fetch_and_nand:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    pushl %esi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK32-NEXT:    subl $24, %esp
+; CHECK32-NEXT:    .cfi_def_cfa_offset 32
+; CHECK32-NEXT:    .cfi_offset %esi, -8
+; CHECK32-NEXT:    subl $8, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 8
+; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl %eax
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    calll __sync_fetch_and_nand_16
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
+; CHECK32-NEXT:    addl $28, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -28
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK32-NEXT:    movl %esi, var+8
+; CHECK32-NEXT:    movl %edx, var+12
+; CHECK32-NEXT:    movl %eax, var
+; CHECK32-NEXT:    movl %ecx, var+4
+; CHECK32-NEXT:    addl $24, %esp
+; CHECK32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK32-NEXT:    popl %esi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 4
+; CHECK32-NEXT:    retl
   %val = atomicrmw nand i128* %p, i128 %bits release
   store i128 %val, i128* @var, align 16
   ret void
@@ -76,6 +173,46 @@ define void @fetch_and_or(i128* %p, i128
 ; CHECK-NEXT:    movq %rdx, _var+{{.*}}(%rip)
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    retq
+;
+; CHECK32-LABEL: fetch_and_or:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    pushl %esi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK32-NEXT:    subl $24, %esp
+; CHECK32-NEXT:    .cfi_def_cfa_offset 32
+; CHECK32-NEXT:    .cfi_offset %esi, -8
+; CHECK32-NEXT:    subl $8, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 8
+; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl %eax
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    calll __sync_fetch_and_or_16
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
+; CHECK32-NEXT:    addl $28, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -28
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK32-NEXT:    movl %esi, var+8
+; CHECK32-NEXT:    movl %edx, var+12
+; CHECK32-NEXT:    movl %eax, var
+; CHECK32-NEXT:    movl %ecx, var+4
+; CHECK32-NEXT:    addl $24, %esp
+; CHECK32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK32-NEXT:    popl %esi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 4
+; CHECK32-NEXT:    retl
   %val = atomicrmw or i128* %p, i128 %bits seq_cst
   store i128 %val, i128* @var, align 16
   ret void
@@ -104,6 +241,46 @@ define void @fetch_and_add(i128* %p, i12
 ; CHECK-NEXT:    movq %rdx, _var+{{.*}}(%rip)
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    retq
+;
+; CHECK32-LABEL: fetch_and_add:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    pushl %esi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK32-NEXT:    subl $24, %esp
+; CHECK32-NEXT:    .cfi_def_cfa_offset 32
+; CHECK32-NEXT:    .cfi_offset %esi, -8
+; CHECK32-NEXT:    subl $8, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 8
+; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl %eax
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    calll __sync_fetch_and_add_16
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
+; CHECK32-NEXT:    addl $28, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -28
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK32-NEXT:    movl %esi, var+8
+; CHECK32-NEXT:    movl %edx, var+12
+; CHECK32-NEXT:    movl %eax, var
+; CHECK32-NEXT:    movl %ecx, var+4
+; CHECK32-NEXT:    addl $24, %esp
+; CHECK32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK32-NEXT:    popl %esi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 4
+; CHECK32-NEXT:    retl
   %val = atomicrmw add i128* %p, i128 %bits seq_cst
   store i128 %val, i128* @var, align 16
   ret void
@@ -132,6 +309,46 @@ define void @fetch_and_sub(i128* %p, i12
 ; CHECK-NEXT:    movq %rdx, _var+{{.*}}(%rip)
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    retq
+;
+; CHECK32-LABEL: fetch_and_sub:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    pushl %esi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK32-NEXT:    subl $24, %esp
+; CHECK32-NEXT:    .cfi_def_cfa_offset 32
+; CHECK32-NEXT:    .cfi_offset %esi, -8
+; CHECK32-NEXT:    subl $8, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 8
+; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl %eax
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    calll __sync_fetch_and_sub_16
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
+; CHECK32-NEXT:    addl $28, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -28
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK32-NEXT:    movl %esi, var+8
+; CHECK32-NEXT:    movl %edx, var+12
+; CHECK32-NEXT:    movl %eax, var
+; CHECK32-NEXT:    movl %ecx, var+4
+; CHECK32-NEXT:    addl $24, %esp
+; CHECK32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK32-NEXT:    popl %esi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 4
+; CHECK32-NEXT:    retl
   %val = atomicrmw sub i128* %p, i128 %bits seq_cst
   store i128 %val, i128* @var, align 16
   ret void
@@ -163,6 +380,46 @@ define void @fetch_and_min(i128* %p, i12
 ; CHECK-NEXT:    movq %rdx, _var+{{.*}}(%rip)
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    retq
+;
+; CHECK32-LABEL: fetch_and_min:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    pushl %esi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK32-NEXT:    subl $24, %esp
+; CHECK32-NEXT:    .cfi_def_cfa_offset 32
+; CHECK32-NEXT:    .cfi_offset %esi, -8
+; CHECK32-NEXT:    subl $8, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 8
+; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl %eax
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    calll __sync_fetch_and_min_16
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
+; CHECK32-NEXT:    addl $28, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -28
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK32-NEXT:    movl %esi, var+8
+; CHECK32-NEXT:    movl %edx, var+12
+; CHECK32-NEXT:    movl %eax, var
+; CHECK32-NEXT:    movl %ecx, var+4
+; CHECK32-NEXT:    addl $24, %esp
+; CHECK32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK32-NEXT:    popl %esi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 4
+; CHECK32-NEXT:    retl
   %val = atomicrmw min i128* %p, i128 %bits seq_cst
   store i128 %val, i128* @var, align 16
   ret void
@@ -194,6 +451,46 @@ define void @fetch_and_max(i128* %p, i12
 ; CHECK-NEXT:    movq %rdx, _var+{{.*}}(%rip)
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    retq
+;
+; CHECK32-LABEL: fetch_and_max:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    pushl %esi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK32-NEXT:    subl $24, %esp
+; CHECK32-NEXT:    .cfi_def_cfa_offset 32
+; CHECK32-NEXT:    .cfi_offset %esi, -8
+; CHECK32-NEXT:    subl $8, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 8
+; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl %eax
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    calll __sync_fetch_and_max_16
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
+; CHECK32-NEXT:    addl $28, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -28
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK32-NEXT:    movl %esi, var+8
+; CHECK32-NEXT:    movl %edx, var+12
+; CHECK32-NEXT:    movl %eax, var
+; CHECK32-NEXT:    movl %ecx, var+4
+; CHECK32-NEXT:    addl $24, %esp
+; CHECK32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK32-NEXT:    popl %esi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 4
+; CHECK32-NEXT:    retl
   %val = atomicrmw max i128* %p, i128 %bits seq_cst
   store i128 %val, i128* @var, align 16
   ret void
@@ -225,6 +522,46 @@ define void @fetch_and_umin(i128* %p, i1
 ; CHECK-NEXT:    movq %rdx, _var+{{.*}}(%rip)
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    retq
+;
+; CHECK32-LABEL: fetch_and_umin:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    pushl %esi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK32-NEXT:    subl $24, %esp
+; CHECK32-NEXT:    .cfi_def_cfa_offset 32
+; CHECK32-NEXT:    .cfi_offset %esi, -8
+; CHECK32-NEXT:    subl $8, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 8
+; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl %eax
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    calll __sync_fetch_and_umin_16
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
+; CHECK32-NEXT:    addl $28, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -28
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK32-NEXT:    movl %esi, var+8
+; CHECK32-NEXT:    movl %edx, var+12
+; CHECK32-NEXT:    movl %eax, var
+; CHECK32-NEXT:    movl %ecx, var+4
+; CHECK32-NEXT:    addl $24, %esp
+; CHECK32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK32-NEXT:    popl %esi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 4
+; CHECK32-NEXT:    retl
   %val = atomicrmw umin i128* %p, i128 %bits seq_cst
   store i128 %val, i128* @var, align 16
   ret void
@@ -256,6 +593,46 @@ define void @fetch_and_umax(i128* %p, i1
 ; CHECK-NEXT:    movq %rdx, _var+{{.*}}(%rip)
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    retq
+;
+; CHECK32-LABEL: fetch_and_umax:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    pushl %esi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK32-NEXT:    subl $24, %esp
+; CHECK32-NEXT:    .cfi_def_cfa_offset 32
+; CHECK32-NEXT:    .cfi_offset %esi, -8
+; CHECK32-NEXT:    subl $8, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 8
+; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl %eax
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    calll __sync_fetch_and_umax_16
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
+; CHECK32-NEXT:    addl $28, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -28
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK32-NEXT:    movl %esi, var+8
+; CHECK32-NEXT:    movl %edx, var+12
+; CHECK32-NEXT:    movl %eax, var
+; CHECK32-NEXT:    movl %ecx, var+4
+; CHECK32-NEXT:    addl $24, %esp
+; CHECK32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK32-NEXT:    popl %esi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 4
+; CHECK32-NEXT:    retl
   %val = atomicrmw umax i128* %p, i128 %bits seq_cst
   store i128 %val, i128* @var, align 16
   ret void
@@ -274,6 +651,61 @@ define i128 @atomic_load_seq_cst(i128* %
 ; CHECK-NEXT:    lock cmpxchg16b (%rdi)
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    retq
+;
+; CHECK32-LABEL: atomic_load_seq_cst:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    pushl %edi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK32-NEXT:    pushl %esi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 12
+; CHECK32-NEXT:    subl $20, %esp
+; CHECK32-NEXT:    .cfi_def_cfa_offset 32
+; CHECK32-NEXT:    .cfi_offset %esi, -12
+; CHECK32-NEXT:    .cfi_offset %edi, -8
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK32-NEXT:    subl $8, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 8
+; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    pushl $0
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl $0
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl $0
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl $0
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl $0
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl $0
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl $0
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl $0
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl %eax
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    calll __sync_val_compare_and_swap_16
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
+; CHECK32-NEXT:    addl $44, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -44
+; CHECK32-NEXT:    movl (%esp), %eax
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK32-NEXT:    movl %edi, 8(%esi)
+; CHECK32-NEXT:    movl %edx, 12(%esi)
+; CHECK32-NEXT:    movl %eax, (%esi)
+; CHECK32-NEXT:    movl %ecx, 4(%esi)
+; CHECK32-NEXT:    movl %esi, %eax
+; CHECK32-NEXT:    addl $20, %esp
+; CHECK32-NEXT:    .cfi_def_cfa_offset 12
+; CHECK32-NEXT:    popl %esi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK32-NEXT:    popl %edi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 4
+; CHECK32-NEXT:    retl $4
    %r = load atomic i128, i128* %p seq_cst, align 16
    ret i128 %r
 }
@@ -291,6 +723,61 @@ define i128 @atomic_load_relaxed(i128* %
 ; CHECK-NEXT:    lock cmpxchg16b (%rdi)
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    retq
+;
+; CHECK32-LABEL: atomic_load_relaxed:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    pushl %edi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK32-NEXT:    pushl %esi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 12
+; CHECK32-NEXT:    subl $20, %esp
+; CHECK32-NEXT:    .cfi_def_cfa_offset 32
+; CHECK32-NEXT:    .cfi_offset %esi, -12
+; CHECK32-NEXT:    .cfi_offset %edi, -8
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK32-NEXT:    subl $8, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 8
+; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    pushl $0
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl $0
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl $0
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl $0
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl $0
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl $0
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl $0
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl $0
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl %eax
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    calll __sync_val_compare_and_swap_16
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
+; CHECK32-NEXT:    addl $44, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -44
+; CHECK32-NEXT:    movl (%esp), %eax
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK32-NEXT:    movl %edi, 8(%esi)
+; CHECK32-NEXT:    movl %edx, 12(%esi)
+; CHECK32-NEXT:    movl %eax, (%esi)
+; CHECK32-NEXT:    movl %ecx, 4(%esi)
+; CHECK32-NEXT:    movl %esi, %eax
+; CHECK32-NEXT:    addl $20, %esp
+; CHECK32-NEXT:    .cfi_def_cfa_offset 12
+; CHECK32-NEXT:    popl %esi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 8
+; CHECK32-NEXT:    popl %edi
+; CHECK32-NEXT:    .cfi_def_cfa_offset 4
+; CHECK32-NEXT:    retl $4
    %r = load atomic i128, i128* %p monotonic, align 16
    ret i128 %r
 }
@@ -313,6 +800,29 @@ define void @atomic_store_seq_cst(i128*
 ; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    retq
+;
+; CHECK32-LABEL: atomic_store_seq_cst:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    subl $36, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 36
+; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl %eax
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    calll __sync_lock_test_and_set_16
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
+; CHECK32-NEXT:    addl $56, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -56
+; CHECK32-NEXT:    retl
    store atomic i128 %in, i128* %p seq_cst, align 16
    ret void
 }
@@ -335,6 +845,29 @@ define void @atomic_store_release(i128*
 ; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    retq
+;
+; CHECK32-LABEL: atomic_store_release:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    subl $36, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 36
+; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl %eax
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    calll __sync_lock_test_and_set_16
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
+; CHECK32-NEXT:    addl $56, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -56
+; CHECK32-NEXT:    retl
    store atomic i128 %in, i128* %p release, align 16
    ret void
 }
@@ -357,30 +890,31 @@ define void @atomic_store_relaxed(i128*
 ; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    retq
+;
+; CHECK32-LABEL: atomic_store_relaxed:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    subl $36, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 36
+; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    pushl %eax
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT:    calll __sync_lock_test_and_set_16
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
+; CHECK32-NEXT:    addl $56, %esp
+; CHECK32-NEXT:    .cfi_adjust_cfa_offset -56
+; CHECK32-NEXT:    retl
    store atomic i128 %in, i128* %p unordered, align 16
    ret void
 }
 
 
- at fsc128 = external global fp128
-
-define void @atomic_fetch_swapf128(fp128 %x) nounwind {
-; CHECK-LABEL: atomic_fetch_swapf128:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    movq %rsi, %rcx
-; CHECK-NEXT:    movq %rdi, %rbx
-; CHECK-NEXT:    movq _fsc128@{{.*}}(%rip), %rsi
-; CHECK-NEXT:    movq (%rsi), %rax
-; CHECK-NEXT:    movq 8(%rsi), %rdx
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  LBB14_1: ## %atomicrmw.start
-; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    lock cmpxchg16b (%rsi)
-; CHECK-NEXT:    jne LBB14_1
-; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end
-; CHECK-NEXT:    popq %rbx
-; CHECK-NEXT:    retq
-  %t1 = atomicrmw xchg fp128* @fsc128, fp128 %x acquire
-  ret void
-}

Added: llvm/trunk/test/CodeGen/X86/atomicf128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomicf128.ll?rev=356078&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/atomicf128.ll (added)
+++ llvm/trunk/test/CodeGen/X86/atomicf128.ll Wed Mar 13 11:48:50 2019
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9 -verify-machineinstrs -mattr=cx16 | FileCheck %s
+
+; FIXME: This test has a fatal error in 32-bit mode
+
+ at fsc128 = external global fp128
+
+define void @atomic_fetch_swapf128(fp128 %x) nounwind {
+; CHECK-LABEL: atomic_fetch_swapf128:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movq %rdi, %rbx
+; CHECK-NEXT:    movq _fsc128@{{.*}}(%rip), %rsi
+; CHECK-NEXT:    movq (%rsi), %rax
+; CHECK-NEXT:    movq 8(%rsi), %rdx
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  LBB0_1: ## %atomicrmw.start
+; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    lock cmpxchg16b (%rsi)
+; CHECK-NEXT:    jne LBB0_1
+; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    retq
+  %t1 = atomicrmw xchg fp128* @fsc128, fp128 %x acquire
+  ret void
+}




More information about the llvm-commits mailing list