[llvm] a5653b8 - [X86] Improve atomics test for !pcsections

Mon Feb 27 03:16:57 PST 2023

Author: Marco Elver
Date: 2023-02-27T12:16:13+01:00
New Revision: a5653b82d0b419b4a4c6ec10210e713de4a158a6

URL: https://github.com/llvm/llvm-project/commit/a5653b82d0b419b4a4c6ec10210e713de4a158a6
DIFF: https://github.com/llvm/llvm-project/commit/a5653b82d0b419b4a4c6ec10210e713de4a158a6.diff

LOG: [X86] Improve atomics test for !pcsections

Extend pcsections-atomics.ll to exhaustively test all atomic ops up to
64 bits. This currently shows that some atomic operations do not end up
in PC sections. This will be addressed in a subsequent change.

Differential Revision: https://reviews.llvm.org/D144710

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/pcsections-atomics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/pcsections-atomics.ll b/llvm/test/CodeGen/X86/pcsections-atomics.ll
index 69801621538e..11e02be00442 100644

--- a/llvm/test/CodeGen/X86/pcsections-atomics.ll
+++ b/llvm/test/CodeGen/X86/pcsections-atomics.ll
@@ -1,5 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ;
+; Test that atomic ops in IR marked with !pcsections end up in a PC section in
+; asm. All functions start with a non-atomic access, continue with an atomic
+; access, and end with another non-atomic access; this is to test that the
+; !pcsections propagation doesn't accidentally touch adjacent instructions.
+;
 ; RUN: llc -O0 < %s | FileCheck %s --check-prefixes=O0
 ; RUN: llc -O1 < %s | FileCheck %s --check-prefixes=O1
 ; RUN: llc -O2 < %s | FileCheck %s --check-prefixes=O2
@@ -8,141 +13,9403 @@
 target triple = "x86_64-unknown-linux-gnu"
 
 @foo = dso_local global i64 0, align 8
- at bar = dso_local global i64 0, align 8
 
-define i64 @test_simple_atomic() {
-; O0-LABEL: test_simple_atomic:
+define void @mixed_atomic_non_atomic(ptr %a) {
+; O0-LABEL: mixed_atomic_non_atomic:
 ; O0:       # %bb.0: # %entry
+; O0-NEXT:    movl (%rdi), %eax
+; O0-NEXT:    incl %eax
+; O0-NEXT:    movl %eax, (%rdi)
 ; O0-NEXT:  .Lpcsection0:
-; O0-NEXT:    movq foo(%rip), %rax
-; O0-NEXT:    addq bar, %rax
+; O0-NEXT:    movl $1, (%rdi)
+; O0-NEXT:    movl (%rdi), %eax
+; O0-NEXT:    addl $-1, %eax
+; O0-NEXT:    movl %eax, (%rdi)
 ; O0-NEXT:    retq
 ;
-; O1-LABEL: test_simple_atomic:
+; O1-LABEL: mixed_atomic_non_atomic:
 ; O1:       # %bb.0: # %entry
+; O1-NEXT:    incl (%rdi)
 ; O1-NEXT:  .Lpcsection0:
-; O1-NEXT:    movq foo(%rip), %rax
-; O1-NEXT:    addq bar(%rip), %rax
+; O1-NEXT:    movl $1, (%rdi)
+; O1-NEXT:    decl (%rdi)
 ; O1-NEXT:    retq
 ;
-; O2-LABEL: test_simple_atomic:
+; O2-LABEL: mixed_atomic_non_atomic:
 ; O2:       # %bb.0: # %entry
+; O2-NEXT:    incl (%rdi)
 ; O2-NEXT:  .Lpcsection0:
-; O2-NEXT:    movq foo(%rip), %rax
-; O2-NEXT:    addq bar(%rip), %rax
+; O2-NEXT:    movl $1, (%rdi)
+; O2-NEXT:    decl (%rdi)
 ; O2-NEXT:    retq
 ;
-; O3-LABEL: test_simple_atomic:
+; O3-LABEL: mixed_atomic_non_atomic:
 ; O3:       # %bb.0: # %entry
+; O3-NEXT:    incl (%rdi)
 ; O3-NEXT:  .Lpcsection0:
-; O3-NEXT:    movq foo(%rip), %rax
-; O3-NEXT:    addq bar(%rip), %rax
+; O3-NEXT:    movl $1, (%rdi)
+; O3-NEXT:    decl (%rdi)
 ; O3-NEXT:    retq
 entry:
-  %0 = load atomic i64, ptr @foo monotonic, align 8, !pcsections !0
-  %1 = load i64, ptr @bar, align 8
-  %add = add nsw i64 %1, %0
-  ret i64 %add
+  ; Accesses the same location atomically and non-atomically.
+  %0 = load volatile i32, ptr %a, align 4
+  %inc = add nsw i32 %0, 1
+  store volatile i32 %inc, ptr %a, align 4
+  store atomic volatile i32 1, ptr %a monotonic, align 4, !pcsections !0
+  %1 = load volatile i32, ptr %a, align 4
+  %dec = add nsw i32 %1, -1
+  store volatile i32 %dec, ptr %a, align 4
+  ret void
 }
 
-define i64 @test_complex_atomic() {
-; O0-LABEL: test_complex_atomic:
+define i64 @mixed_complex_atomic_non_atomic(ptr %a, ptr %b) {
+; O0-LABEL: mixed_complex_atomic_non_atomic:
 ; O0:       # %bb.0: # %entry
 ; O0-NEXT:    movl $1, %ecx
 ; O0-NEXT:  .Lpcsection1:
-; O0-NEXT:    lock xaddq %rcx, foo(%rip)
-; O0-NEXT:    movq bar, %rax
+; O0-NEXT:    lock xaddq %rcx, (%rdi)
+; O0-NEXT:    movq (%rsi), %rax
 ; O0-NEXT:    movq %rax, %rdx
 ; O0-NEXT:    addq $1, %rdx
-; O0-NEXT:    movq %rdx, bar
+; O0-NEXT:    movq %rdx, (%rsi)
 ; O0-NEXT:    addq %rcx, %rax
 ; O0-NEXT:    retq
 ;
-; O1-LABEL: test_complex_atomic:
+; O1-LABEL: mixed_complex_atomic_non_atomic:
 ; O1:       # %bb.0: # %entry
 ; O1-NEXT:    movl $1, %eax
 ; O1-NEXT:  .Lpcsection1:
-; O1-NEXT:    lock xaddq %rax, foo(%rip)
-; O1-NEXT:    movq bar(%rip), %rcx
+; O1-NEXT:    lock xaddq %rax, (%rdi)
+; O1-NEXT:    movq (%rsi), %rcx
 ; O1-NEXT:    leaq 1(%rcx), %rdx
-; O1-NEXT:    movq %rdx, bar(%rip)
+; O1-NEXT:    movq %rdx, (%rsi)
 ; O1-NEXT:    addq %rcx, %rax
 ; O1-NEXT:    retq
 ;
-; O2-LABEL: test_complex_atomic:
+; O2-LABEL: mixed_complex_atomic_non_atomic:
 ; O2:       # %bb.0: # %entry
 ; O2-NEXT:    movl $1, %eax
 ; O2-NEXT:  .Lpcsection1:
-; O2-NEXT:    lock xaddq %rax, foo(%rip)
-; O2-NEXT:    movq bar(%rip), %rcx
+; O2-NEXT:    lock xaddq %rax, (%rdi)
+; O2-NEXT:    movq (%rsi), %rcx
 ; O2-NEXT:    leaq 1(%rcx), %rdx
-; O2-NEXT:    movq %rdx, bar(%rip)
+; O2-NEXT:    movq %rdx, (%rsi)
 ; O2-NEXT:    addq %rcx, %rax
 ; O2-NEXT:    retq
 ;
-; O3-LABEL: test_complex_atomic:
+; O3-LABEL: mixed_complex_atomic_non_atomic:
 ; O3:       # %bb.0: # %entry
 ; O3-NEXT:    movl $1, %eax
 ; O3-NEXT:  .Lpcsection1:
-; O3-NEXT:    lock xaddq %rax, foo(%rip)
-; O3-NEXT:    movq bar(%rip), %rcx
+; O3-NEXT:    lock xaddq %rax, (%rdi)
+; O3-NEXT:    movq (%rsi), %rcx
 ; O3-NEXT:    leaq 1(%rcx), %rdx
-; O3-NEXT:    movq %rdx, bar(%rip)
+; O3-NEXT:    movq %rdx, (%rsi)
 ; O3-NEXT:    addq %rcx, %rax
 ; O3-NEXT:    retq
 entry:
-  %0 = atomicrmw add ptr @foo, i64 1 monotonic, align 8, !pcsections !0
-  %1 = load i64, ptr @bar, align 8
+  %0 = atomicrmw add ptr %a, i64 1 monotonic, align 8, !pcsections !0
+  %1 = load i64, ptr %b, align 8
   %inc = add nsw i64 %1, 1
-  store i64 %inc, ptr @bar, align 8
+  store i64 %inc, ptr %b, align 8
   %add = add nsw i64 %1, %0
   ret i64 %add
 }
 
-define void @mixed_atomic_non_atomic() {
-; O0-LABEL: mixed_atomic_non_atomic:
+define i8 @atomic8_load_unordered(ptr %a) {
+; O0-LABEL: atomic8_load_unordered:
 ; O0:       # %bb.0: # %entry
-; O0-NEXT:    movl foo(%rip), %eax
-; O0-NEXT:    incl %eax
-; O0-NEXT:    movl %eax, foo(%rip)
+; O0-NEXT:    movq foo(%rip), %rax
 ; O0-NEXT:  .Lpcsection2:
-; O0-NEXT:    movl $1, foo(%rip)
-; O0-NEXT:    movl foo, %eax
-; O0-NEXT:    addl $-1, %eax
-; O0-NEXT:    movl %eax, foo
+; O0-NEXT:    movb (%rdi), %al
+; O0-NEXT:    movq $1, foo
 ; O0-NEXT:    retq
 ;
-; O1-LABEL: mixed_atomic_non_atomic:
+; O1-LABEL: atomic8_load_unordered:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movzbl (%rdi), %eax
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_load_unordered:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movzbl (%rdi), %eax
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_load_unordered:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movzbl (%rdi), %eax
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = load atomic i8, ptr %a unordered, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret i8 %x
+}
+
+define i8 @atomic8_load_monotonic(ptr %a) {
+; O0-LABEL: atomic8_load_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection3:
+; O0-NEXT:    movb (%rdi), %al
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_load_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movzbl (%rdi), %eax
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_load_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movzbl (%rdi), %eax
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_load_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movzbl (%rdi), %eax
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = load atomic i8, ptr %a monotonic, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret i8 %x
+}
+
+define i8 @atomic8_load_acquire(ptr %a) {
+; O0-LABEL: atomic8_load_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection4:
+; O0-NEXT:    movb (%rdi), %al
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_load_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movzbl (%rdi), %eax
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_load_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movzbl (%rdi), %eax
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_load_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movzbl (%rdi), %eax
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = load atomic i8, ptr %a acquire, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret i8 %x
+}
+
+define i8 @atomic8_load_seq_cst(ptr %a) {
+; O0-LABEL: atomic8_load_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection5:
+; O0-NEXT:    movb (%rdi), %al
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_load_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movzbl (%rdi), %eax
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_load_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movzbl (%rdi), %eax
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_load_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movzbl (%rdi), %eax
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = load atomic i8, ptr %a seq_cst, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret i8 %x
+}
+
+define void @atomic8_store_unordered(ptr %a) {
+; O0-LABEL: atomic8_store_unordered:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection6:
+; O0-NEXT:    movb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_store_unordered:
 ; O1:       # %bb.0: # %entry
-; O1-NEXT:    incl foo(%rip)
+; O1-NEXT:    movq foo(%rip), %rax
 ; O1-NEXT:  .Lpcsection2:
-; O1-NEXT:    movl $1, foo(%rip)
-; O1-NEXT:    decl foo(%rip)
+; O1-NEXT:    movb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
 ; O1-NEXT:    retq
 ;
-; O2-LABEL: mixed_atomic_non_atomic:
+; O2-LABEL: atomic8_store_unordered:
 ; O2:       # %bb.0: # %entry
-; O2-NEXT:    incl foo(%rip)
+; O2-NEXT:    movq foo(%rip), %rax
 ; O2-NEXT:  .Lpcsection2:
-; O2-NEXT:    movl $1, foo(%rip)
-; O2-NEXT:    decl foo(%rip)
+; O2-NEXT:    movb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
 ; O2-NEXT:    retq
 ;
-; O3-LABEL: mixed_atomic_non_atomic:
+; O3-LABEL: atomic8_store_unordered:
 ; O3:       # %bb.0: # %entry
-; O3-NEXT:    incl foo(%rip)
+; O3-NEXT:    movq foo(%rip), %rax
 ; O3-NEXT:  .Lpcsection2:
-; O3-NEXT:    movl $1, foo(%rip)
-; O3-NEXT:    decl foo(%rip)
+; O3-NEXT:    movb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
 entry:
-  %0 = load volatile i32, ptr @foo, align 4
-  %inc = add nsw i32 %0, 1
-  store volatile i32 %inc, ptr @foo, align 4
-  store atomic volatile i32 1, ptr @foo monotonic, align 4, !pcsections !0
-  %1 = load volatile i32, ptr @foo, align 4
-  %dec = add nsw i32 %1, -1
-  store volatile i32 %dec, ptr @foo, align 4
+  load volatile i64, ptr @foo, align 8
+  store atomic i8 42, ptr %a unordered, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_store_monotonic(ptr %a) {
+; O0-LABEL: atomic8_store_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection7:
+; O0-NEXT:    movb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_store_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection3:
+; O1-NEXT:    movb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_store_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection3:
+; O2-NEXT:    movb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_store_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection3:
+; O3-NEXT:    movb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  store atomic i8 42, ptr %a monotonic, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_store_release(ptr %a) {
+; O0-LABEL: atomic8_store_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection8:
+; O0-NEXT:    movb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_store_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection4:
+; O1-NEXT:    movb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_store_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection4:
+; O2-NEXT:    movb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_store_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection4:
+; O3-NEXT:    movb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  store atomic i8 42, ptr %a release, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_store_seq_cst(ptr %a) {
+; O0-LABEL: atomic8_store_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movb $42, %al
+; O0-NEXT:  .Lpcsection9:
+; O0-NEXT:    xchgb %al, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_store_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movb $42, %al
+; O1-NEXT:  .Lpcsection5:
+; O1-NEXT:    xchgb %al, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_store_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movb $42, %al
+; O2-NEXT:  .Lpcsection5:
+; O2-NEXT:    xchgb %al, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_store_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movb $42, %al
+; O3-NEXT:  .Lpcsection5:
+; O3-NEXT:    xchgb %al, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  store atomic i8 42, ptr %a seq_cst, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_xchg_monotonic(ptr %a) {
+; O0-LABEL: atomic8_xchg_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movb $42, %al
+; O0-NEXT:  .Lpcsection10:
+; O0-NEXT:    xchgb %al, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_xchg_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movb $42, %al
+; O1-NEXT:  .Lpcsection6:
+; O1-NEXT:    xchgb %al, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_xchg_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movb $42, %al
+; O2-NEXT:  .Lpcsection6:
+; O2-NEXT:    xchgb %al, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_xchg_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movb $42, %al
+; O3-NEXT:  .Lpcsection6:
+; O3-NEXT:    xchgb %al, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xchg ptr %a, i8 42 monotonic, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_add_monotonic(ptr %a) {
+; O0-LABEL: atomic8_add_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection11:
+; O0-NEXT:    lock addb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_add_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection7:
+; O1-NEXT:    lock addb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_add_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection7:
+; O2-NEXT:    lock addb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_add_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection7:
+; O3-NEXT:    lock addb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw add ptr %a, i8 42 monotonic, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_sub_monotonic(ptr %a) {
+; O0-LABEL: atomic8_sub_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection12:
+; O0-NEXT:    lock subb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_sub_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection8:
+; O1-NEXT:    lock subb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_sub_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection8:
+; O2-NEXT:    lock subb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_sub_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection8:
+; O3-NEXT:    lock subb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw sub ptr %a, i8 42 monotonic, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_and_monotonic(ptr %a) {
+; O0-LABEL: atomic8_and_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection13:
+; O0-NEXT:    lock andb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_and_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection9:
+; O1-NEXT:    lock andb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_and_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection9:
+; O2-NEXT:    lock andb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_and_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection9:
+; O3-NEXT:    lock andb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw and ptr %a, i8 42 monotonic, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_or_monotonic(ptr %a) {
+; O0-LABEL: atomic8_or_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection14:
+; O0-NEXT:    lock orb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_or_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection10:
+; O1-NEXT:    lock orb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_or_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection10:
+; O2-NEXT:    lock orb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_or_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection10:
+; O3-NEXT:    lock orb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw or ptr %a, i8 42 monotonic, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_xor_monotonic(ptr %a) {
+; O0-LABEL: atomic8_xor_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection15:
+; O0-NEXT:    lock xorb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_xor_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection11:
+; O1-NEXT:    lock xorb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_xor_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection11:
+; O2-NEXT:    lock xorb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_xor_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection11:
+; O3-NEXT:    lock xorb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xor ptr %a, i8 42 monotonic, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_nand_monotonic(ptr %a) {
+; O0-LABEL: atomic8_nand_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movq foo, %rax
+; O0-NEXT:  .Lpcsection16:
+; O0-NEXT:    movb (%rdi), %al
+; O0-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; O0-NEXT:  .LBB16_1: # %atomicrmw.start
+; O0-NEXT:    # =>This Inner Loop Header: Depth=1
+; O0-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT:    movb %al, %dl
+; O0-NEXT:    notb %dl
+; O0-NEXT:  .Lpcsection17:
+; O0-NEXT:    orb $-43, %dl
+; O0-NEXT:    lock cmpxchgb %dl, (%rcx)
+; O0-NEXT:  .Lpcsection18:
+; O0-NEXT:    sete %cl
+; O0-NEXT:  .Lpcsection19:
+; O0-NEXT:    testb $1, %cl
+; O0-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; O0-NEXT:  .Lpcsection20:
+; O0-NEXT:    jne .LBB16_2
+; O0-NEXT:    jmp .LBB16_1
+; O0-NEXT:  .LBB16_2: # %atomicrmw.end
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_nand_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movzbl (%rdi), %eax
+; O1-NEXT:    .p2align 4, 0x90
+; O1-NEXT:  .LBB16_1: # %atomicrmw.start
+; O1-NEXT:    # =>This Inner Loop Header: Depth=1
+; O1-NEXT:    movl %eax, %ecx
+; O1-NEXT:    notb %cl
+; O1-NEXT:  .Lpcsection12:
+; O1-NEXT:    orb $-43, %cl
+; O1-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O1-NEXT:  .Lpcsection13:
+; O1-NEXT:    jne .LBB16_1
+; O1-NEXT:  # %bb.2: # %atomicrmw.end
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_nand_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movzbl (%rdi), %eax
+; O2-NEXT:    .p2align 4, 0x90
+; O2-NEXT:  .LBB16_1: # %atomicrmw.start
+; O2-NEXT:    # =>This Inner Loop Header: Depth=1
+; O2-NEXT:    movl %eax, %ecx
+; O2-NEXT:    notb %cl
+; O2-NEXT:  .Lpcsection12:
+; O2-NEXT:    orb $-43, %cl
+; O2-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O2-NEXT:  .Lpcsection13:
+; O2-NEXT:    jne .LBB16_1
+; O2-NEXT:  # %bb.2: # %atomicrmw.end
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_nand_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movzbl (%rdi), %eax
+; O3-NEXT:    .p2align 4, 0x90
+; O3-NEXT:  .LBB16_1: # %atomicrmw.start
+; O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; O3-NEXT:    movl %eax, %ecx
+; O3-NEXT:    notb %cl
+; O3-NEXT:  .Lpcsection12:
+; O3-NEXT:    orb $-43, %cl
+; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O3-NEXT:  .Lpcsection13:
+; O3-NEXT:    jne .LBB16_1
+; O3-NEXT:  # %bb.2: # %atomicrmw.end
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw nand ptr %a, i8 42 monotonic, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_xchg_acquire(ptr %a) {
+; O0-LABEL: atomic8_xchg_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movb $42, %al
+; O0-NEXT:  .Lpcsection21:
+; O0-NEXT:    xchgb %al, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_xchg_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movb $42, %al
+; O1-NEXT:  .Lpcsection14:
+; O1-NEXT:    xchgb %al, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_xchg_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movb $42, %al
+; O2-NEXT:  .Lpcsection14:
+; O2-NEXT:    xchgb %al, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_xchg_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movb $42, %al
+; O3-NEXT:  .Lpcsection14:
+; O3-NEXT:    xchgb %al, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xchg ptr %a, i8 42 acquire, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_add_acquire(ptr %a) {
+; O0-LABEL: atomic8_add_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection22:
+; O0-NEXT:    lock addb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_add_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection15:
+; O1-NEXT:    lock addb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_add_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection15:
+; O2-NEXT:    lock addb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_add_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection15:
+; O3-NEXT:    lock addb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw add ptr %a, i8 42 acquire, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_sub_acquire(ptr %a) {
+; O0-LABEL: atomic8_sub_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection23:
+; O0-NEXT:    lock subb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_sub_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection16:
+; O1-NEXT:    lock subb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_sub_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection16:
+; O2-NEXT:    lock subb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_sub_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection16:
+; O3-NEXT:    lock subb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw sub ptr %a, i8 42 acquire, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_and_acquire(ptr %a) {
+; O0-LABEL: atomic8_and_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection24:
+; O0-NEXT:    lock andb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_and_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection17:
+; O1-NEXT:    lock andb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_and_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection17:
+; O2-NEXT:    lock andb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_and_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection17:
+; O3-NEXT:    lock andb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw and ptr %a, i8 42 acquire, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_or_acquire(ptr %a) {
+; O0-LABEL: atomic8_or_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection25:
+; O0-NEXT:    lock orb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_or_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection18:
+; O1-NEXT:    lock orb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_or_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection18:
+; O2-NEXT:    lock orb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_or_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection18:
+; O3-NEXT:    lock orb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw or ptr %a, i8 42 acquire, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_xor_acquire(ptr %a) {
+; O0-LABEL: atomic8_xor_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection26:
+; O0-NEXT:    lock xorb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_xor_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection19:
+; O1-NEXT:    lock xorb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_xor_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection19:
+; O2-NEXT:    lock xorb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_xor_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection19:
+; O3-NEXT:    lock xorb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xor ptr %a, i8 42 acquire, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_nand_acquire(ptr %a) {
+; O0-LABEL: atomic8_nand_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movq foo, %rax
+; O0-NEXT:  .Lpcsection27:
+; O0-NEXT:    movb (%rdi), %al
+; O0-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; O0-NEXT:  .LBB23_1: # %atomicrmw.start
+; O0-NEXT:    # =>This Inner Loop Header: Depth=1
+; O0-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT:    movb %al, %dl
+; O0-NEXT:    notb %dl
+; O0-NEXT:  .Lpcsection28:
+; O0-NEXT:    orb $-43, %dl
+; O0-NEXT:    lock cmpxchgb %dl, (%rcx)
+; O0-NEXT:  .Lpcsection29:
+; O0-NEXT:    sete %cl
+; O0-NEXT:  .Lpcsection30:
+; O0-NEXT:    testb $1, %cl
+; O0-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; O0-NEXT:  .Lpcsection31:
+; O0-NEXT:    jne .LBB23_2
+; O0-NEXT:    jmp .LBB23_1
+; O0-NEXT:  .LBB23_2: # %atomicrmw.end
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_nand_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movzbl (%rdi), %eax
+; O1-NEXT:    .p2align 4, 0x90
+; O1-NEXT:  .LBB23_1: # %atomicrmw.start
+; O1-NEXT:    # =>This Inner Loop Header: Depth=1
+; O1-NEXT:    movl %eax, %ecx
+; O1-NEXT:    notb %cl
+; O1-NEXT:  .Lpcsection20:
+; O1-NEXT:    orb $-43, %cl
+; O1-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O1-NEXT:  .Lpcsection21:
+; O1-NEXT:    jne .LBB23_1
+; O1-NEXT:  # %bb.2: # %atomicrmw.end
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_nand_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movzbl (%rdi), %eax
+; O2-NEXT:    .p2align 4, 0x90
+; O2-NEXT:  .LBB23_1: # %atomicrmw.start
+; O2-NEXT:    # =>This Inner Loop Header: Depth=1
+; O2-NEXT:    movl %eax, %ecx
+; O2-NEXT:    notb %cl
+; O2-NEXT:  .Lpcsection20:
+; O2-NEXT:    orb $-43, %cl
+; O2-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O2-NEXT:  .Lpcsection21:
+; O2-NEXT:    jne .LBB23_1
+; O2-NEXT:  # %bb.2: # %atomicrmw.end
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_nand_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movzbl (%rdi), %eax
+; O3-NEXT:    .p2align 4, 0x90
+; O3-NEXT:  .LBB23_1: # %atomicrmw.start
+; O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; O3-NEXT:    movl %eax, %ecx
+; O3-NEXT:    notb %cl
+; O3-NEXT:  .Lpcsection20:
+; O3-NEXT:    orb $-43, %cl
+; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O3-NEXT:  .Lpcsection21:
+; O3-NEXT:    jne .LBB23_1
+; O3-NEXT:  # %bb.2: # %atomicrmw.end
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw nand ptr %a, i8 42 acquire, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_xchg_release(ptr %a) {
+; O0-LABEL: atomic8_xchg_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movb $42, %al
+; O0-NEXT:  .Lpcsection32:
+; O0-NEXT:    xchgb %al, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_xchg_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movb $42, %al
+; O1-NEXT:  .Lpcsection22:
+; O1-NEXT:    xchgb %al, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_xchg_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movb $42, %al
+; O2-NEXT:  .Lpcsection22:
+; O2-NEXT:    xchgb %al, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_xchg_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movb $42, %al
+; O3-NEXT:  .Lpcsection22:
+; O3-NEXT:    xchgb %al, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xchg ptr %a, i8 42 release, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_add_release(ptr %a) {
+; O0-LABEL: atomic8_add_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection33:
+; O0-NEXT:    lock addb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_add_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection23:
+; O1-NEXT:    lock addb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_add_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection23:
+; O2-NEXT:    lock addb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_add_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection23:
+; O3-NEXT:    lock addb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw add ptr %a, i8 42 release, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_sub_release(ptr %a) {
+; O0-LABEL: atomic8_sub_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection34:
+; O0-NEXT:    lock subb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_sub_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection24:
+; O1-NEXT:    lock subb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_sub_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection24:
+; O2-NEXT:    lock subb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_sub_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection24:
+; O3-NEXT:    lock subb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw sub ptr %a, i8 42 release, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_and_release(ptr %a) {
+; O0-LABEL: atomic8_and_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection35:
+; O0-NEXT:    lock andb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_and_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection25:
+; O1-NEXT:    lock andb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_and_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection25:
+; O2-NEXT:    lock andb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_and_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection25:
+; O3-NEXT:    lock andb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw and ptr %a, i8 42 release, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_or_release(ptr %a) {
+; O0-LABEL: atomic8_or_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection36:
+; O0-NEXT:    lock orb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_or_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection26:
+; O1-NEXT:    lock orb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_or_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection26:
+; O2-NEXT:    lock orb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_or_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection26:
+; O3-NEXT:    lock orb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw or ptr %a, i8 42 release, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_xor_release(ptr %a) {
+; O0-LABEL: atomic8_xor_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection37:
+; O0-NEXT:    lock xorb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_xor_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection27:
+; O1-NEXT:    lock xorb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_xor_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection27:
+; O2-NEXT:    lock xorb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_xor_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection27:
+; O3-NEXT:    lock xorb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xor ptr %a, i8 42 release, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_nand_release(ptr %a) {
+; O0-LABEL: atomic8_nand_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movq foo, %rax
+; O0-NEXT:  .Lpcsection38:
+; O0-NEXT:    movb (%rdi), %al
+; O0-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; O0-NEXT:  .LBB30_1: # %atomicrmw.start
+; O0-NEXT:    # =>This Inner Loop Header: Depth=1
+; O0-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT:    movb %al, %dl
+; O0-NEXT:    notb %dl
+; O0-NEXT:  .Lpcsection39:
+; O0-NEXT:    orb $-43, %dl
+; O0-NEXT:    lock cmpxchgb %dl, (%rcx)
+; O0-NEXT:  .Lpcsection40:
+; O0-NEXT:    sete %cl
+; O0-NEXT:  .Lpcsection41:
+; O0-NEXT:    testb $1, %cl
+; O0-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; O0-NEXT:  .Lpcsection42:
+; O0-NEXT:    jne .LBB30_2
+; O0-NEXT:    jmp .LBB30_1
+; O0-NEXT:  .LBB30_2: # %atomicrmw.end
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_nand_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movzbl (%rdi), %eax
+; O1-NEXT:    .p2align 4, 0x90
+; O1-NEXT:  .LBB30_1: # %atomicrmw.start
+; O1-NEXT:    # =>This Inner Loop Header: Depth=1
+; O1-NEXT:    movl %eax, %ecx
+; O1-NEXT:    notb %cl
+; O1-NEXT:  .Lpcsection28:
+; O1-NEXT:    orb $-43, %cl
+; O1-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O1-NEXT:  .Lpcsection29:
+; O1-NEXT:    jne .LBB30_1
+; O1-NEXT:  # %bb.2: # %atomicrmw.end
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_nand_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movzbl (%rdi), %eax
+; O2-NEXT:    .p2align 4, 0x90
+; O2-NEXT:  .LBB30_1: # %atomicrmw.start
+; O2-NEXT:    # =>This Inner Loop Header: Depth=1
+; O2-NEXT:    movl %eax, %ecx
+; O2-NEXT:    notb %cl
+; O2-NEXT:  .Lpcsection28:
+; O2-NEXT:    orb $-43, %cl
+; O2-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O2-NEXT:  .Lpcsection29:
+; O2-NEXT:    jne .LBB30_1
+; O2-NEXT:  # %bb.2: # %atomicrmw.end
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_nand_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movzbl (%rdi), %eax
+; O3-NEXT:    .p2align 4, 0x90
+; O3-NEXT:  .LBB30_1: # %atomicrmw.start
+; O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; O3-NEXT:    movl %eax, %ecx
+; O3-NEXT:    notb %cl
+; O3-NEXT:  .Lpcsection28:
+; O3-NEXT:    orb $-43, %cl
+; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O3-NEXT:  .Lpcsection29:
+; O3-NEXT:    jne .LBB30_1
+; O3-NEXT:  # %bb.2: # %atomicrmw.end
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw nand ptr %a, i8 42 release, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_xchg_acq_rel(ptr %a) {
+; O0-LABEL: atomic8_xchg_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movb $42, %al
+; O0-NEXT:  .Lpcsection43:
+; O0-NEXT:    xchgb %al, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_xchg_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movb $42, %al
+; O1-NEXT:  .Lpcsection30:
+; O1-NEXT:    xchgb %al, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_xchg_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movb $42, %al
+; O2-NEXT:  .Lpcsection30:
+; O2-NEXT:    xchgb %al, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_xchg_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movb $42, %al
+; O3-NEXT:  .Lpcsection30:
+; O3-NEXT:    xchgb %al, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xchg ptr %a, i8 42 acq_rel, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_add_acq_rel(ptr %a) {
+; O0-LABEL: atomic8_add_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection44:
+; O0-NEXT:    lock addb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_add_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection31:
+; O1-NEXT:    lock addb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_add_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection31:
+; O2-NEXT:    lock addb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_add_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection31:
+; O3-NEXT:    lock addb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw add ptr %a, i8 42 acq_rel, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_sub_acq_rel(ptr %a) {
+; O0-LABEL: atomic8_sub_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection45:
+; O0-NEXT:    lock subb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_sub_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection32:
+; O1-NEXT:    lock subb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_sub_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection32:
+; O2-NEXT:    lock subb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_sub_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection32:
+; O3-NEXT:    lock subb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw sub ptr %a, i8 42 acq_rel, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_and_acq_rel(ptr %a) {
+; O0-LABEL: atomic8_and_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection46:
+; O0-NEXT:    lock andb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_and_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection33:
+; O1-NEXT:    lock andb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_and_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection33:
+; O2-NEXT:    lock andb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_and_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection33:
+; O3-NEXT:    lock andb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw and ptr %a, i8 42 acq_rel, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_or_acq_rel(ptr %a) {
+; O0-LABEL: atomic8_or_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection47:
+; O0-NEXT:    lock orb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_or_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection34:
+; O1-NEXT:    lock orb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_or_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection34:
+; O2-NEXT:    lock orb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_or_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection34:
+; O3-NEXT:    lock orb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw or ptr %a, i8 42 acq_rel, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_xor_acq_rel(ptr %a) {
+; O0-LABEL: atomic8_xor_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection48:
+; O0-NEXT:    lock xorb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_xor_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection35:
+; O1-NEXT:    lock xorb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_xor_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection35:
+; O2-NEXT:    lock xorb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_xor_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection35:
+; O3-NEXT:    lock xorb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xor ptr %a, i8 42 acq_rel, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_nand_acq_rel(ptr %a) {
+; O0-LABEL: atomic8_nand_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movq foo, %rax
+; O0-NEXT:  .Lpcsection49:
+; O0-NEXT:    movb (%rdi), %al
+; O0-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; O0-NEXT:  .LBB37_1: # %atomicrmw.start
+; O0-NEXT:    # =>This Inner Loop Header: Depth=1
+; O0-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT:    movb %al, %dl
+; O0-NEXT:    notb %dl
+; O0-NEXT:  .Lpcsection50:
+; O0-NEXT:    orb $-43, %dl
+; O0-NEXT:    lock cmpxchgb %dl, (%rcx)
+; O0-NEXT:  .Lpcsection51:
+; O0-NEXT:    sete %cl
+; O0-NEXT:  .Lpcsection52:
+; O0-NEXT:    testb $1, %cl
+; O0-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; O0-NEXT:  .Lpcsection53:
+; O0-NEXT:    jne .LBB37_2
+; O0-NEXT:    jmp .LBB37_1
+; O0-NEXT:  .LBB37_2: # %atomicrmw.end
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_nand_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movzbl (%rdi), %eax
+; O1-NEXT:    .p2align 4, 0x90
+; O1-NEXT:  .LBB37_1: # %atomicrmw.start
+; O1-NEXT:    # =>This Inner Loop Header: Depth=1
+; O1-NEXT:    movl %eax, %ecx
+; O1-NEXT:    notb %cl
+; O1-NEXT:  .Lpcsection36:
+; O1-NEXT:    orb $-43, %cl
+; O1-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O1-NEXT:  .Lpcsection37:
+; O1-NEXT:    jne .LBB37_1
+; O1-NEXT:  # %bb.2: # %atomicrmw.end
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_nand_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movzbl (%rdi), %eax
+; O2-NEXT:    .p2align 4, 0x90
+; O2-NEXT:  .LBB37_1: # %atomicrmw.start
+; O2-NEXT:    # =>This Inner Loop Header: Depth=1
+; O2-NEXT:    movl %eax, %ecx
+; O2-NEXT:    notb %cl
+; O2-NEXT:  .Lpcsection36:
+; O2-NEXT:    orb $-43, %cl
+; O2-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O2-NEXT:  .Lpcsection37:
+; O2-NEXT:    jne .LBB37_1
+; O2-NEXT:  # %bb.2: # %atomicrmw.end
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_nand_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movzbl (%rdi), %eax
+; O3-NEXT:    .p2align 4, 0x90
+; O3-NEXT:  .LBB37_1: # %atomicrmw.start
+; O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; O3-NEXT:    movl %eax, %ecx
+; O3-NEXT:    notb %cl
+; O3-NEXT:  .Lpcsection36:
+; O3-NEXT:    orb $-43, %cl
+; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O3-NEXT:  .Lpcsection37:
+; O3-NEXT:    jne .LBB37_1
+; O3-NEXT:  # %bb.2: # %atomicrmw.end
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw nand ptr %a, i8 42 acq_rel, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_xchg_seq_cst(ptr %a) {
+; O0-LABEL: atomic8_xchg_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movb $42, %al
+; O0-NEXT:  .Lpcsection54:
+; O0-NEXT:    xchgb %al, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_xchg_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movb $42, %al
+; O1-NEXT:  .Lpcsection38:
+; O1-NEXT:    xchgb %al, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_xchg_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movb $42, %al
+; O2-NEXT:  .Lpcsection38:
+; O2-NEXT:    xchgb %al, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_xchg_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movb $42, %al
+; O3-NEXT:  .Lpcsection38:
+; O3-NEXT:    xchgb %al, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xchg ptr %a, i8 42 seq_cst, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_add_seq_cst(ptr %a) {
+; O0-LABEL: atomic8_add_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection55:
+; O0-NEXT:    lock addb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_add_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection39:
+; O1-NEXT:    lock addb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_add_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection39:
+; O2-NEXT:    lock addb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_add_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection39:
+; O3-NEXT:    lock addb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw add ptr %a, i8 42 seq_cst, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_sub_seq_cst(ptr %a) {
+; O0-LABEL: atomic8_sub_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection56:
+; O0-NEXT:    lock subb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_sub_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection40:
+; O1-NEXT:    lock subb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_sub_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection40:
+; O2-NEXT:    lock subb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_sub_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection40:
+; O3-NEXT:    lock subb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw sub ptr %a, i8 42 seq_cst, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_and_seq_cst(ptr %a) {
+; O0-LABEL: atomic8_and_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection57:
+; O0-NEXT:    lock andb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_and_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection41:
+; O1-NEXT:    lock andb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_and_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection41:
+; O2-NEXT:    lock andb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_and_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection41:
+; O3-NEXT:    lock andb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw and ptr %a, i8 42 seq_cst, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_or_seq_cst(ptr %a) {
+; O0-LABEL: atomic8_or_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection58:
+; O0-NEXT:    lock orb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_or_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection42:
+; O1-NEXT:    lock orb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_or_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection42:
+; O2-NEXT:    lock orb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_or_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection42:
+; O3-NEXT:    lock orb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw or ptr %a, i8 42 seq_cst, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_xor_seq_cst(ptr %a) {
+; O0-LABEL: atomic8_xor_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection59:
+; O0-NEXT:    lock xorb $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_xor_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection43:
+; O1-NEXT:    lock xorb $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_xor_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection43:
+; O2-NEXT:    lock xorb $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_xor_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection43:
+; O3-NEXT:    lock xorb $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xor ptr %a, i8 42 seq_cst, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_nand_seq_cst(ptr %a) {
+; O0-LABEL: atomic8_nand_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movq foo, %rax
+; O0-NEXT:  .Lpcsection60:
+; O0-NEXT:    movb (%rdi), %al
+; O0-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; O0-NEXT:  .LBB44_1: # %atomicrmw.start
+; O0-NEXT:    # =>This Inner Loop Header: Depth=1
+; O0-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT:    movb %al, %dl
+; O0-NEXT:    notb %dl
+; O0-NEXT:  .Lpcsection61:
+; O0-NEXT:    orb $-43, %dl
+; O0-NEXT:    lock cmpxchgb %dl, (%rcx)
+; O0-NEXT:  .Lpcsection62:
+; O0-NEXT:    sete %cl
+; O0-NEXT:  .Lpcsection63:
+; O0-NEXT:    testb $1, %cl
+; O0-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; O0-NEXT:  .Lpcsection64:
+; O0-NEXT:    jne .LBB44_2
+; O0-NEXT:    jmp .LBB44_1
+; O0-NEXT:  .LBB44_2: # %atomicrmw.end
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_nand_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movzbl (%rdi), %eax
+; O1-NEXT:    .p2align 4, 0x90
+; O1-NEXT:  .LBB44_1: # %atomicrmw.start
+; O1-NEXT:    # =>This Inner Loop Header: Depth=1
+; O1-NEXT:    movl %eax, %ecx
+; O1-NEXT:    notb %cl
+; O1-NEXT:  .Lpcsection44:
+; O1-NEXT:    orb $-43, %cl
+; O1-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O1-NEXT:  .Lpcsection45:
+; O1-NEXT:    jne .LBB44_1
+; O1-NEXT:  # %bb.2: # %atomicrmw.end
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_nand_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movzbl (%rdi), %eax
+; O2-NEXT:    .p2align 4, 0x90
+; O2-NEXT:  .LBB44_1: # %atomicrmw.start
+; O2-NEXT:    # =>This Inner Loop Header: Depth=1
+; O2-NEXT:    movl %eax, %ecx
+; O2-NEXT:    notb %cl
+; O2-NEXT:  .Lpcsection44:
+; O2-NEXT:    orb $-43, %cl
+; O2-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O2-NEXT:  .Lpcsection45:
+; O2-NEXT:    jne .LBB44_1
+; O2-NEXT:  # %bb.2: # %atomicrmw.end
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_nand_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movzbl (%rdi), %eax
+; O3-NEXT:    .p2align 4, 0x90
+; O3-NEXT:  .LBB44_1: # %atomicrmw.start
+; O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; O3-NEXT:    movl %eax, %ecx
+; O3-NEXT:    notb %cl
+; O3-NEXT:  .Lpcsection44:
+; O3-NEXT:    orb $-43, %cl
+; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O3-NEXT:  .Lpcsection45:
+; O3-NEXT:    jne .LBB44_1
+; O3-NEXT:  # %bb.2: # %atomicrmw.end
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw nand ptr %a, i8 42 seq_cst, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_cas_monotonic(ptr %a) {
+; O0-LABEL: atomic8_cas_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movb $42, %al
+; O0-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; O0-NEXT:    movb $1, %cl
+; O0-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O0-NEXT:  .Lpcsection65:
+; O0-NEXT:    # kill: def $dl killed $al
+; O0-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; O0-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O0-NEXT:  .Lpcsection66:
+; O0-NEXT:    # kill: def $dl killed $al
+; O0-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; O0-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_cas_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movb $1, %cl
+; O1-NEXT:    movb $42, %al
+; O1-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O1-NEXT:    movb $42, %al
+; O1-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O1-NEXT:    movb $42, %al
+; O1-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_cas_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movb $1, %cl
+; O2-NEXT:    movb $42, %al
+; O2-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O2-NEXT:    movb $42, %al
+; O2-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O2-NEXT:    movb $42, %al
+; O2-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_cas_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movb $1, %cl
+; O3-NEXT:    movb $42, %al
+; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O3-NEXT:    movb $42, %al
+; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O3-NEXT:    movb $42, %al
+; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = cmpxchg ptr %a, i8 42, i8 1 monotonic monotonic, align 1, !pcsections !0
+  %y = cmpxchg ptr %a, i8 42, i8 1 monotonic acquire, align 1, !pcsections !0
+  %z = cmpxchg ptr %a, i8 42, i8 1 monotonic seq_cst, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_cas_acquire(ptr %a) {
+; O0-LABEL: atomic8_cas_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movb $42, %al
+; O0-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; O0-NEXT:    movb $1, %cl
+; O0-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O0-NEXT:  .Lpcsection67:
+; O0-NEXT:    # kill: def $dl killed $al
+; O0-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; O0-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O0-NEXT:  .Lpcsection68:
+; O0-NEXT:    # kill: def $dl killed $al
+; O0-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; O0-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_cas_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movb $1, %cl
+; O1-NEXT:    movb $42, %al
+; O1-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O1-NEXT:    movb $42, %al
+; O1-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O1-NEXT:    movb $42, %al
+; O1-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_cas_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movb $1, %cl
+; O2-NEXT:    movb $42, %al
+; O2-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O2-NEXT:    movb $42, %al
+; O2-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O2-NEXT:    movb $42, %al
+; O2-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_cas_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movb $1, %cl
+; O3-NEXT:    movb $42, %al
+; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O3-NEXT:    movb $42, %al
+; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O3-NEXT:    movb $42, %al
+; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = cmpxchg ptr %a, i8 42, i8 1 acquire monotonic, align 1, !pcsections !0
+  %y = cmpxchg ptr %a, i8 42, i8 1 acquire acquire, align 1, !pcsections !0
+  %z = cmpxchg ptr %a, i8 42, i8 1 acquire seq_cst, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_cas_release(ptr %a) {
+; O0-LABEL: atomic8_cas_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movb $42, %al
+; O0-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; O0-NEXT:    movb $1, %cl
+; O0-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O0-NEXT:  .Lpcsection69:
+; O0-NEXT:    # kill: def $dl killed $al
+; O0-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; O0-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O0-NEXT:  .Lpcsection70:
+; O0-NEXT:    # kill: def $dl killed $al
+; O0-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; O0-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_cas_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movb $1, %cl
+; O1-NEXT:    movb $42, %al
+; O1-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O1-NEXT:    movb $42, %al
+; O1-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O1-NEXT:    movb $42, %al
+; O1-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_cas_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movb $1, %cl
+; O2-NEXT:    movb $42, %al
+; O2-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O2-NEXT:    movb $42, %al
+; O2-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O2-NEXT:    movb $42, %al
+; O2-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_cas_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movb $1, %cl
+; O3-NEXT:    movb $42, %al
+; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O3-NEXT:    movb $42, %al
+; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O3-NEXT:    movb $42, %al
+; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = cmpxchg ptr %a, i8 42, i8 1 release monotonic, align 1, !pcsections !0
+  %y = cmpxchg ptr %a, i8 42, i8 1 release acquire, align 1, !pcsections !0
+  %z = cmpxchg ptr %a, i8 42, i8 1 release seq_cst, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_cas_acq_rel(ptr %a) {
+; O0-LABEL: atomic8_cas_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movb $42, %al
+; O0-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; O0-NEXT:    movb $1, %cl
+; O0-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O0-NEXT:  .Lpcsection71:
+; O0-NEXT:    # kill: def $dl killed $al
+; O0-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; O0-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O0-NEXT:  .Lpcsection72:
+; O0-NEXT:    # kill: def $dl killed $al
+; O0-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; O0-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_cas_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movb $1, %cl
+; O1-NEXT:    movb $42, %al
+; O1-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O1-NEXT:    movb $42, %al
+; O1-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O1-NEXT:    movb $42, %al
+; O1-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_cas_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movb $1, %cl
+; O2-NEXT:    movb $42, %al
+; O2-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O2-NEXT:    movb $42, %al
+; O2-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O2-NEXT:    movb $42, %al
+; O2-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_cas_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movb $1, %cl
+; O3-NEXT:    movb $42, %al
+; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O3-NEXT:    movb $42, %al
+; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O3-NEXT:    movb $42, %al
+; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = cmpxchg ptr %a, i8 42, i8 1 acq_rel monotonic, align 1, !pcsections !0
+  %y = cmpxchg ptr %a, i8 42, i8 1 acq_rel acquire, align 1, !pcsections !0
+  %z = cmpxchg ptr %a, i8 42, i8 1 acq_rel seq_cst, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic8_cas_seq_cst(ptr %a) {
+; O0-LABEL: atomic8_cas_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movb $42, %al
+; O0-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; O0-NEXT:    movb $1, %cl
+; O0-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O0-NEXT:  .Lpcsection73:
+; O0-NEXT:    # kill: def $dl killed $al
+; O0-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; O0-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O0-NEXT:  .Lpcsection74:
+; O0-NEXT:    # kill: def $dl killed $al
+; O0-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; O0-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic8_cas_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movb $1, %cl
+; O1-NEXT:    movb $42, %al
+; O1-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O1-NEXT:    movb $42, %al
+; O1-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O1-NEXT:    movb $42, %al
+; O1-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic8_cas_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movb $1, %cl
+; O2-NEXT:    movb $42, %al
+; O2-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O2-NEXT:    movb $42, %al
+; O2-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O2-NEXT:    movb $42, %al
+; O2-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic8_cas_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movb $1, %cl
+; O3-NEXT:    movb $42, %al
+; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O3-NEXT:    movb $42, %al
+; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O3-NEXT:    movb $42, %al
+; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = cmpxchg ptr %a, i8 42, i8 1 seq_cst monotonic, align 1, !pcsections !0
+  %y = cmpxchg ptr %a, i8 42, i8 1 seq_cst acquire, align 1, !pcsections !0
+  %z = cmpxchg ptr %a, i8 42, i8 1 seq_cst seq_cst, align 1, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define i16 @atomic16_load_unordered(ptr %a) {
+; O0-LABEL: atomic16_load_unordered:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection75:
+; O0-NEXT:    movw (%rdi), %ax
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_load_unordered:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movzwl (%rdi), %eax
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_load_unordered:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movzwl (%rdi), %eax
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_load_unordered:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movzwl (%rdi), %eax
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = load atomic i16, ptr %a unordered, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret i16 %x
+}
+
+define i16 @atomic16_load_monotonic(ptr %a) {
+; O0-LABEL: atomic16_load_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection76:
+; O0-NEXT:    movw (%rdi), %ax
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_load_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movzwl (%rdi), %eax
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_load_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movzwl (%rdi), %eax
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_load_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movzwl (%rdi), %eax
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = load atomic i16, ptr %a monotonic, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret i16 %x
+}
+
+define i16 @atomic16_load_acquire(ptr %a) {
+; O0-LABEL: atomic16_load_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection77:
+; O0-NEXT:    movw (%rdi), %ax
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_load_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movzwl (%rdi), %eax
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_load_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movzwl (%rdi), %eax
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_load_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movzwl (%rdi), %eax
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = load atomic i16, ptr %a acquire, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret i16 %x
+}
+
+define i16 @atomic16_load_seq_cst(ptr %a) {
+; O0-LABEL: atomic16_load_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection78:
+; O0-NEXT:    movw (%rdi), %ax
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_load_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movzwl (%rdi), %eax
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_load_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movzwl (%rdi), %eax
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_load_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movzwl (%rdi), %eax
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = load atomic i16, ptr %a seq_cst, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret i16 %x
+}
+
+define void @atomic16_store_unordered(ptr %a) {
+; O0-LABEL: atomic16_store_unordered:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection79:
+; O0-NEXT:    movw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_store_unordered:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection46:
+; O1-NEXT:    movw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_store_unordered:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection46:
+; O2-NEXT:    movw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_store_unordered:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection46:
+; O3-NEXT:    movw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  store atomic i16 42, ptr %a unordered, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_store_monotonic(ptr %a) {
+; O0-LABEL: atomic16_store_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection80:
+; O0-NEXT:    movw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_store_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection47:
+; O1-NEXT:    movw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_store_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection47:
+; O2-NEXT:    movw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_store_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection47:
+; O3-NEXT:    movw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  store atomic i16 42, ptr %a monotonic, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_store_release(ptr %a) {
+; O0-LABEL: atomic16_store_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection81:
+; O0-NEXT:    movw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_store_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection48:
+; O1-NEXT:    movw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_store_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection48:
+; O2-NEXT:    movw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_store_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection48:
+; O3-NEXT:    movw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  store atomic i16 42, ptr %a release, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_store_seq_cst(ptr %a) {
+; O0-LABEL: atomic16_store_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movw $42, %ax
+; O0-NEXT:  .Lpcsection82:
+; O0-NEXT:    xchgw %ax, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_store_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movw $42, %ax
+; O1-NEXT:  .Lpcsection49:
+; O1-NEXT:    xchgw %ax, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_store_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movw $42, %ax
+; O2-NEXT:  .Lpcsection49:
+; O2-NEXT:    xchgw %ax, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_store_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movw $42, %ax
+; O3-NEXT:  .Lpcsection49:
+; O3-NEXT:    xchgw %ax, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  store atomic i16 42, ptr %a seq_cst, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_xchg_monotonic(ptr %a) {
+; O0-LABEL: atomic16_xchg_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movw $42, %ax
+; O0-NEXT:  .Lpcsection83:
+; O0-NEXT:    xchgw %ax, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_xchg_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movw $42, %ax
+; O1-NEXT:  .Lpcsection50:
+; O1-NEXT:    xchgw %ax, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_xchg_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movw $42, %ax
+; O2-NEXT:  .Lpcsection50:
+; O2-NEXT:    xchgw %ax, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_xchg_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movw $42, %ax
+; O3-NEXT:  .Lpcsection50:
+; O3-NEXT:    xchgw %ax, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xchg ptr %a, i16 42 monotonic, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_add_monotonic(ptr %a) {
+; O0-LABEL: atomic16_add_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection84:
+; O0-NEXT:    lock addw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_add_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection51:
+; O1-NEXT:    lock addw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_add_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection51:
+; O2-NEXT:    lock addw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_add_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection51:
+; O3-NEXT:    lock addw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw add ptr %a, i16 42 monotonic, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_sub_monotonic(ptr %a) {
+; O0-LABEL: atomic16_sub_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection85:
+; O0-NEXT:    lock subw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_sub_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection52:
+; O1-NEXT:    lock subw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_sub_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection52:
+; O2-NEXT:    lock subw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_sub_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection52:
+; O3-NEXT:    lock subw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw sub ptr %a, i16 42 monotonic, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_and_monotonic(ptr %a) {
+; O0-LABEL: atomic16_and_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection86:
+; O0-NEXT:    lock andw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_and_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection53:
+; O1-NEXT:    lock andw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_and_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection53:
+; O2-NEXT:    lock andw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_and_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection53:
+; O3-NEXT:    lock andw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw and ptr %a, i16 42 monotonic, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_or_monotonic(ptr %a) {
+; O0-LABEL: atomic16_or_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection87:
+; O0-NEXT:    lock orw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_or_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection54:
+; O1-NEXT:    lock orw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_or_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection54:
+; O2-NEXT:    lock orw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_or_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection54:
+; O3-NEXT:    lock orw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw or ptr %a, i16 42 monotonic, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_xor_monotonic(ptr %a) {
+; O0-LABEL: atomic16_xor_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection88:
+; O0-NEXT:    lock xorw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_xor_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection55:
+; O1-NEXT:    lock xorw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_xor_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection55:
+; O2-NEXT:    lock xorw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_xor_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection55:
+; O3-NEXT:    lock xorw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xor ptr %a, i16 42 monotonic, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_nand_monotonic(ptr %a) {
+; O0-LABEL: atomic16_nand_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movq foo, %rax
+; O0-NEXT:  .Lpcsection89:
+; O0-NEXT:    movw (%rdi), %ax
+; O0-NEXT:    movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; O0-NEXT:  .LBB64_1: # %atomicrmw.start
+; O0-NEXT:    # =>This Inner Loop Header: Depth=1
+; O0-NEXT:    movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT:    # implicit-def: $edx
+; O0-NEXT:    movw %ax, %dx
+; O0-NEXT:    notl %edx
+; O0-NEXT:    orl $65493, %edx # imm = 0xFFD5
+; O0-NEXT:  .Lpcsection90:
+; O0-NEXT:    # kill: def $dx killed $dx killed $edx
+; O0-NEXT:    lock cmpxchgw %dx, (%rcx)
+; O0-NEXT:  .Lpcsection91:
+; O0-NEXT:    sete %cl
+; O0-NEXT:  .Lpcsection92:
+; O0-NEXT:    testb $1, %cl
+; O0-NEXT:    movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; O0-NEXT:  .Lpcsection93:
+; O0-NEXT:    jne .LBB64_2
+; O0-NEXT:    jmp .LBB64_1
+; O0-NEXT:  .LBB64_2: # %atomicrmw.end
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_nand_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movzwl (%rdi), %eax
+; O1-NEXT:    .p2align 4, 0x90
+; O1-NEXT:  .LBB64_1: # %atomicrmw.start
+; O1-NEXT:    # =>This Inner Loop Header: Depth=1
+; O1-NEXT:    movl %eax, %ecx
+; O1-NEXT:    notl %ecx
+; O1-NEXT:    orl $65493, %ecx # imm = 0xFFD5
+; O1-NEXT:    # kill: def $ax killed $ax killed $eax
+; O1-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O1-NEXT:  .Lpcsection56:
+; O1-NEXT:    # kill: def $ax killed $ax def $eax
+; O1-NEXT:  .Lpcsection57:
+; O1-NEXT:    jne .LBB64_1
+; O1-NEXT:  # %bb.2: # %atomicrmw.end
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_nand_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movzwl (%rdi), %eax
+; O2-NEXT:    .p2align 4, 0x90
+; O2-NEXT:  .LBB64_1: # %atomicrmw.start
+; O2-NEXT:    # =>This Inner Loop Header: Depth=1
+; O2-NEXT:    movl %eax, %ecx
+; O2-NEXT:    notl %ecx
+; O2-NEXT:    orl $65493, %ecx # imm = 0xFFD5
+; O2-NEXT:    # kill: def $ax killed $ax killed $eax
+; O2-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O2-NEXT:  .Lpcsection56:
+; O2-NEXT:    # kill: def $ax killed $ax def $eax
+; O2-NEXT:  .Lpcsection57:
+; O2-NEXT:    jne .LBB64_1
+; O2-NEXT:  # %bb.2: # %atomicrmw.end
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_nand_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movzwl (%rdi), %eax
+; O3-NEXT:    .p2align 4, 0x90
+; O3-NEXT:  .LBB64_1: # %atomicrmw.start
+; O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; O3-NEXT:    movl %eax, %ecx
+; O3-NEXT:    notl %ecx
+; O3-NEXT:    orl $65493, %ecx # imm = 0xFFD5
+; O3-NEXT:    # kill: def $ax killed $ax killed $eax
+; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O3-NEXT:  .Lpcsection56:
+; O3-NEXT:    # kill: def $ax killed $ax def $eax
+; O3-NEXT:  .Lpcsection57:
+; O3-NEXT:    jne .LBB64_1
+; O3-NEXT:  # %bb.2: # %atomicrmw.end
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw nand ptr %a, i16 42 monotonic, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_xchg_acquire(ptr %a) {
+; O0-LABEL: atomic16_xchg_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movw $42, %ax
+; O0-NEXT:  .Lpcsection94:
+; O0-NEXT:    xchgw %ax, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_xchg_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movw $42, %ax
+; O1-NEXT:  .Lpcsection58:
+; O1-NEXT:    xchgw %ax, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_xchg_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movw $42, %ax
+; O2-NEXT:  .Lpcsection58:
+; O2-NEXT:    xchgw %ax, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_xchg_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movw $42, %ax
+; O3-NEXT:  .Lpcsection58:
+; O3-NEXT:    xchgw %ax, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xchg ptr %a, i16 42 acquire, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_add_acquire(ptr %a) {
+; O0-LABEL: atomic16_add_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection95:
+; O0-NEXT:    lock addw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_add_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection59:
+; O1-NEXT:    lock addw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_add_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection59:
+; O2-NEXT:    lock addw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_add_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection59:
+; O3-NEXT:    lock addw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw add ptr %a, i16 42 acquire, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_sub_acquire(ptr %a) {
+; O0-LABEL: atomic16_sub_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection96:
+; O0-NEXT:    lock subw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_sub_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection60:
+; O1-NEXT:    lock subw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_sub_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection60:
+; O2-NEXT:    lock subw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_sub_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection60:
+; O3-NEXT:    lock subw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw sub ptr %a, i16 42 acquire, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_and_acquire(ptr %a) {
+; O0-LABEL: atomic16_and_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection97:
+; O0-NEXT:    lock andw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_and_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection61:
+; O1-NEXT:    lock andw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_and_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection61:
+; O2-NEXT:    lock andw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_and_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection61:
+; O3-NEXT:    lock andw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw and ptr %a, i16 42 acquire, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_or_acquire(ptr %a) {
+; O0-LABEL: atomic16_or_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection98:
+; O0-NEXT:    lock orw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_or_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection62:
+; O1-NEXT:    lock orw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_or_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection62:
+; O2-NEXT:    lock orw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_or_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection62:
+; O3-NEXT:    lock orw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw or ptr %a, i16 42 acquire, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_xor_acquire(ptr %a) {
+; O0-LABEL: atomic16_xor_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection99:
+; O0-NEXT:    lock xorw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_xor_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection63:
+; O1-NEXT:    lock xorw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_xor_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection63:
+; O2-NEXT:    lock xorw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_xor_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection63:
+; O3-NEXT:    lock xorw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xor ptr %a, i16 42 acquire, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_nand_acquire(ptr %a) {
+; O0-LABEL: atomic16_nand_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movq foo, %rax
+; O0-NEXT:  .Lpcsection100:
+; O0-NEXT:    movw (%rdi), %ax
+; O0-NEXT:    movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; O0-NEXT:  .LBB71_1: # %atomicrmw.start
+; O0-NEXT:    # =>This Inner Loop Header: Depth=1
+; O0-NEXT:    movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT:    # implicit-def: $edx
+; O0-NEXT:    movw %ax, %dx
+; O0-NEXT:    notl %edx
+; O0-NEXT:    orl $65493, %edx # imm = 0xFFD5
+; O0-NEXT:  .Lpcsection101:
+; O0-NEXT:    # kill: def $dx killed $dx killed $edx
+; O0-NEXT:    lock cmpxchgw %dx, (%rcx)
+; O0-NEXT:  .Lpcsection102:
+; O0-NEXT:    sete %cl
+; O0-NEXT:  .Lpcsection103:
+; O0-NEXT:    testb $1, %cl
+; O0-NEXT:    movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; O0-NEXT:  .Lpcsection104:
+; O0-NEXT:    jne .LBB71_2
+; O0-NEXT:    jmp .LBB71_1
+; O0-NEXT:  .LBB71_2: # %atomicrmw.end
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_nand_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movzwl (%rdi), %eax
+; O1-NEXT:    .p2align 4, 0x90
+; O1-NEXT:  .LBB71_1: # %atomicrmw.start
+; O1-NEXT:    # =>This Inner Loop Header: Depth=1
+; O1-NEXT:    movl %eax, %ecx
+; O1-NEXT:    notl %ecx
+; O1-NEXT:    orl $65493, %ecx # imm = 0xFFD5
+; O1-NEXT:    # kill: def $ax killed $ax killed $eax
+; O1-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O1-NEXT:  .Lpcsection64:
+; O1-NEXT:    # kill: def $ax killed $ax def $eax
+; O1-NEXT:  .Lpcsection65:
+; O1-NEXT:    jne .LBB71_1
+; O1-NEXT:  # %bb.2: # %atomicrmw.end
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_nand_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movzwl (%rdi), %eax
+; O2-NEXT:    .p2align 4, 0x90
+; O2-NEXT:  .LBB71_1: # %atomicrmw.start
+; O2-NEXT:    # =>This Inner Loop Header: Depth=1
+; O2-NEXT:    movl %eax, %ecx
+; O2-NEXT:    notl %ecx
+; O2-NEXT:    orl $65493, %ecx # imm = 0xFFD5
+; O2-NEXT:    # kill: def $ax killed $ax killed $eax
+; O2-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O2-NEXT:  .Lpcsection64:
+; O2-NEXT:    # kill: def $ax killed $ax def $eax
+; O2-NEXT:  .Lpcsection65:
+; O2-NEXT:    jne .LBB71_1
+; O2-NEXT:  # %bb.2: # %atomicrmw.end
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_nand_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movzwl (%rdi), %eax
+; O3-NEXT:    .p2align 4, 0x90
+; O3-NEXT:  .LBB71_1: # %atomicrmw.start
+; O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; O3-NEXT:    movl %eax, %ecx
+; O3-NEXT:    notl %ecx
+; O3-NEXT:    orl $65493, %ecx # imm = 0xFFD5
+; O3-NEXT:    # kill: def $ax killed $ax killed $eax
+; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O3-NEXT:  .Lpcsection64:
+; O3-NEXT:    # kill: def $ax killed $ax def $eax
+; O3-NEXT:  .Lpcsection65:
+; O3-NEXT:    jne .LBB71_1
+; O3-NEXT:  # %bb.2: # %atomicrmw.end
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw nand ptr %a, i16 42 acquire, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_xchg_release(ptr %a) {
+; O0-LABEL: atomic16_xchg_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movw $42, %ax
+; O0-NEXT:  .Lpcsection105:
+; O0-NEXT:    xchgw %ax, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_xchg_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movw $42, %ax
+; O1-NEXT:  .Lpcsection66:
+; O1-NEXT:    xchgw %ax, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_xchg_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movw $42, %ax
+; O2-NEXT:  .Lpcsection66:
+; O2-NEXT:    xchgw %ax, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_xchg_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movw $42, %ax
+; O3-NEXT:  .Lpcsection66:
+; O3-NEXT:    xchgw %ax, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xchg ptr %a, i16 42 release, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_add_release(ptr %a) {
+; O0-LABEL: atomic16_add_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection106:
+; O0-NEXT:    lock addw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_add_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection67:
+; O1-NEXT:    lock addw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_add_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection67:
+; O2-NEXT:    lock addw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_add_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection67:
+; O3-NEXT:    lock addw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw add ptr %a, i16 42 release, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_sub_release(ptr %a) {
+; O0-LABEL: atomic16_sub_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection107:
+; O0-NEXT:    lock subw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_sub_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection68:
+; O1-NEXT:    lock subw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_sub_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection68:
+; O2-NEXT:    lock subw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_sub_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection68:
+; O3-NEXT:    lock subw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw sub ptr %a, i16 42 release, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_and_release(ptr %a) {
+; O0-LABEL: atomic16_and_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection108:
+; O0-NEXT:    lock andw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_and_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection69:
+; O1-NEXT:    lock andw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_and_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection69:
+; O2-NEXT:    lock andw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_and_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection69:
+; O3-NEXT:    lock andw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw and ptr %a, i16 42 release, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_or_release(ptr %a) {
+; O0-LABEL: atomic16_or_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection109:
+; O0-NEXT:    lock orw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_or_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection70:
+; O1-NEXT:    lock orw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_or_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection70:
+; O2-NEXT:    lock orw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_or_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection70:
+; O3-NEXT:    lock orw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw or ptr %a, i16 42 release, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_xor_release(ptr %a) {
+; O0-LABEL: atomic16_xor_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection110:
+; O0-NEXT:    lock xorw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_xor_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection71:
+; O1-NEXT:    lock xorw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_xor_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection71:
+; O2-NEXT:    lock xorw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_xor_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection71:
+; O3-NEXT:    lock xorw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xor ptr %a, i16 42 release, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_nand_release(ptr %a) {
+; O0-LABEL: atomic16_nand_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movq foo, %rax
+; O0-NEXT:  .Lpcsection111:
+; O0-NEXT:    movw (%rdi), %ax
+; O0-NEXT:    movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; O0-NEXT:  .LBB78_1: # %atomicrmw.start
+; O0-NEXT:    # =>This Inner Loop Header: Depth=1
+; O0-NEXT:    movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT:    # implicit-def: $edx
+; O0-NEXT:    movw %ax, %dx
+; O0-NEXT:    notl %edx
+; O0-NEXT:    orl $65493, %edx # imm = 0xFFD5
+; O0-NEXT:  .Lpcsection112:
+; O0-NEXT:    # kill: def $dx killed $dx killed $edx
+; O0-NEXT:    lock cmpxchgw %dx, (%rcx)
+; O0-NEXT:  .Lpcsection113:
+; O0-NEXT:    sete %cl
+; O0-NEXT:  .Lpcsection114:
+; O0-NEXT:    testb $1, %cl
+; O0-NEXT:    movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; O0-NEXT:  .Lpcsection115:
+; O0-NEXT:    jne .LBB78_2
+; O0-NEXT:    jmp .LBB78_1
+; O0-NEXT:  .LBB78_2: # %atomicrmw.end
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_nand_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movzwl (%rdi), %eax
+; O1-NEXT:    .p2align 4, 0x90
+; O1-NEXT:  .LBB78_1: # %atomicrmw.start
+; O1-NEXT:    # =>This Inner Loop Header: Depth=1
+; O1-NEXT:    movl %eax, %ecx
+; O1-NEXT:    notl %ecx
+; O1-NEXT:    orl $65493, %ecx # imm = 0xFFD5
+; O1-NEXT:    # kill: def $ax killed $ax killed $eax
+; O1-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O1-NEXT:  .Lpcsection72:
+; O1-NEXT:    # kill: def $ax killed $ax def $eax
+; O1-NEXT:  .Lpcsection73:
+; O1-NEXT:    jne .LBB78_1
+; O1-NEXT:  # %bb.2: # %atomicrmw.end
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_nand_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movzwl (%rdi), %eax
+; O2-NEXT:    .p2align 4, 0x90
+; O2-NEXT:  .LBB78_1: # %atomicrmw.start
+; O2-NEXT:    # =>This Inner Loop Header: Depth=1
+; O2-NEXT:    movl %eax, %ecx
+; O2-NEXT:    notl %ecx
+; O2-NEXT:    orl $65493, %ecx # imm = 0xFFD5
+; O2-NEXT:    # kill: def $ax killed $ax killed $eax
+; O2-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O2-NEXT:  .Lpcsection72:
+; O2-NEXT:    # kill: def $ax killed $ax def $eax
+; O2-NEXT:  .Lpcsection73:
+; O2-NEXT:    jne .LBB78_1
+; O2-NEXT:  # %bb.2: # %atomicrmw.end
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_nand_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movzwl (%rdi), %eax
+; O3-NEXT:    .p2align 4, 0x90
+; O3-NEXT:  .LBB78_1: # %atomicrmw.start
+; O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; O3-NEXT:    movl %eax, %ecx
+; O3-NEXT:    notl %ecx
+; O3-NEXT:    orl $65493, %ecx # imm = 0xFFD5
+; O3-NEXT:    # kill: def $ax killed $ax killed $eax
+; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O3-NEXT:  .Lpcsection72:
+; O3-NEXT:    # kill: def $ax killed $ax def $eax
+; O3-NEXT:  .Lpcsection73:
+; O3-NEXT:    jne .LBB78_1
+; O3-NEXT:  # %bb.2: # %atomicrmw.end
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw nand ptr %a, i16 42 release, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_xchg_acq_rel(ptr %a) {
+; O0-LABEL: atomic16_xchg_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movw $42, %ax
+; O0-NEXT:  .Lpcsection116:
+; O0-NEXT:    xchgw %ax, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_xchg_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movw $42, %ax
+; O1-NEXT:  .Lpcsection74:
+; O1-NEXT:    xchgw %ax, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_xchg_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movw $42, %ax
+; O2-NEXT:  .Lpcsection74:
+; O2-NEXT:    xchgw %ax, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_xchg_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movw $42, %ax
+; O3-NEXT:  .Lpcsection74:
+; O3-NEXT:    xchgw %ax, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xchg ptr %a, i16 42 acq_rel, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_add_acq_rel(ptr %a) {
+; O0-LABEL: atomic16_add_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection117:
+; O0-NEXT:    lock addw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_add_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection75:
+; O1-NEXT:    lock addw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_add_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection75:
+; O2-NEXT:    lock addw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_add_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection75:
+; O3-NEXT:    lock addw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw add ptr %a, i16 42 acq_rel, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_sub_acq_rel(ptr %a) {
+; O0-LABEL: atomic16_sub_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection118:
+; O0-NEXT:    lock subw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_sub_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection76:
+; O1-NEXT:    lock subw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_sub_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection76:
+; O2-NEXT:    lock subw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_sub_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection76:
+; O3-NEXT:    lock subw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw sub ptr %a, i16 42 acq_rel, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_and_acq_rel(ptr %a) {
+; O0-LABEL: atomic16_and_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection119:
+; O0-NEXT:    lock andw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_and_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection77:
+; O1-NEXT:    lock andw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_and_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection77:
+; O2-NEXT:    lock andw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_and_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection77:
+; O3-NEXT:    lock andw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw and ptr %a, i16 42 acq_rel, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_or_acq_rel(ptr %a) {
+; O0-LABEL: atomic16_or_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection120:
+; O0-NEXT:    lock orw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_or_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection78:
+; O1-NEXT:    lock orw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_or_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection78:
+; O2-NEXT:    lock orw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_or_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection78:
+; O3-NEXT:    lock orw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw or ptr %a, i16 42 acq_rel, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_xor_acq_rel(ptr %a) {
+; O0-LABEL: atomic16_xor_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection121:
+; O0-NEXT:    lock xorw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_xor_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection79:
+; O1-NEXT:    lock xorw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_xor_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection79:
+; O2-NEXT:    lock xorw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_xor_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection79:
+; O3-NEXT:    lock xorw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xor ptr %a, i16 42 acq_rel, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_nand_acq_rel(ptr %a) {
+; O0-LABEL: atomic16_nand_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movq foo, %rax
+; O0-NEXT:  .Lpcsection122:
+; O0-NEXT:    movw (%rdi), %ax
+; O0-NEXT:    movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; O0-NEXT:  .LBB85_1: # %atomicrmw.start
+; O0-NEXT:    # =>This Inner Loop Header: Depth=1
+; O0-NEXT:    movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT:    # implicit-def: $edx
+; O0-NEXT:    movw %ax, %dx
+; O0-NEXT:    notl %edx
+; O0-NEXT:    orl $65493, %edx # imm = 0xFFD5
+; O0-NEXT:  .Lpcsection123:
+; O0-NEXT:    # kill: def $dx killed $dx killed $edx
+; O0-NEXT:    lock cmpxchgw %dx, (%rcx)
+; O0-NEXT:  .Lpcsection124:
+; O0-NEXT:    sete %cl
+; O0-NEXT:  .Lpcsection125:
+; O0-NEXT:    testb $1, %cl
+; O0-NEXT:    movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; O0-NEXT:  .Lpcsection126:
+; O0-NEXT:    jne .LBB85_2
+; O0-NEXT:    jmp .LBB85_1
+; O0-NEXT:  .LBB85_2: # %atomicrmw.end
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_nand_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movzwl (%rdi), %eax
+; O1-NEXT:    .p2align 4, 0x90
+; O1-NEXT:  .LBB85_1: # %atomicrmw.start
+; O1-NEXT:    # =>This Inner Loop Header: Depth=1
+; O1-NEXT:    movl %eax, %ecx
+; O1-NEXT:    notl %ecx
+; O1-NEXT:    orl $65493, %ecx # imm = 0xFFD5
+; O1-NEXT:    # kill: def $ax killed $ax killed $eax
+; O1-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O1-NEXT:  .Lpcsection80:
+; O1-NEXT:    # kill: def $ax killed $ax def $eax
+; O1-NEXT:  .Lpcsection81:
+; O1-NEXT:    jne .LBB85_1
+; O1-NEXT:  # %bb.2: # %atomicrmw.end
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_nand_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movzwl (%rdi), %eax
+; O2-NEXT:    .p2align 4, 0x90
+; O2-NEXT:  .LBB85_1: # %atomicrmw.start
+; O2-NEXT:    # =>This Inner Loop Header: Depth=1
+; O2-NEXT:    movl %eax, %ecx
+; O2-NEXT:    notl %ecx
+; O2-NEXT:    orl $65493, %ecx # imm = 0xFFD5
+; O2-NEXT:    # kill: def $ax killed $ax killed $eax
+; O2-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O2-NEXT:  .Lpcsection80:
+; O2-NEXT:    # kill: def $ax killed $ax def $eax
+; O2-NEXT:  .Lpcsection81:
+; O2-NEXT:    jne .LBB85_1
+; O2-NEXT:  # %bb.2: # %atomicrmw.end
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_nand_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movzwl (%rdi), %eax
+; O3-NEXT:    .p2align 4, 0x90
+; O3-NEXT:  .LBB85_1: # %atomicrmw.start
+; O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; O3-NEXT:    movl %eax, %ecx
+; O3-NEXT:    notl %ecx
+; O3-NEXT:    orl $65493, %ecx # imm = 0xFFD5
+; O3-NEXT:    # kill: def $ax killed $ax killed $eax
+; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O3-NEXT:  .Lpcsection80:
+; O3-NEXT:    # kill: def $ax killed $ax def $eax
+; O3-NEXT:  .Lpcsection81:
+; O3-NEXT:    jne .LBB85_1
+; O3-NEXT:  # %bb.2: # %atomicrmw.end
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw nand ptr %a, i16 42 acq_rel, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_xchg_seq_cst(ptr %a) {
+; O0-LABEL: atomic16_xchg_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movw $42, %ax
+; O0-NEXT:  .Lpcsection127:
+; O0-NEXT:    xchgw %ax, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_xchg_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movw $42, %ax
+; O1-NEXT:  .Lpcsection82:
+; O1-NEXT:    xchgw %ax, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_xchg_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movw $42, %ax
+; O2-NEXT:  .Lpcsection82:
+; O2-NEXT:    xchgw %ax, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_xchg_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movw $42, %ax
+; O3-NEXT:  .Lpcsection82:
+; O3-NEXT:    xchgw %ax, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xchg ptr %a, i16 42 seq_cst, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_add_seq_cst(ptr %a) {
+; O0-LABEL: atomic16_add_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection128:
+; O0-NEXT:    lock addw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_add_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection83:
+; O1-NEXT:    lock addw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_add_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection83:
+; O2-NEXT:    lock addw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_add_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection83:
+; O3-NEXT:    lock addw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw add ptr %a, i16 42 seq_cst, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_sub_seq_cst(ptr %a) {
+; O0-LABEL: atomic16_sub_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection129:
+; O0-NEXT:    lock subw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_sub_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection84:
+; O1-NEXT:    lock subw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_sub_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection84:
+; O2-NEXT:    lock subw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_sub_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection84:
+; O3-NEXT:    lock subw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw sub ptr %a, i16 42 seq_cst, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_and_seq_cst(ptr %a) {
+; O0-LABEL: atomic16_and_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection130:
+; O0-NEXT:    lock andw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_and_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection85:
+; O1-NEXT:    lock andw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_and_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection85:
+; O2-NEXT:    lock andw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_and_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection85:
+; O3-NEXT:    lock andw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw and ptr %a, i16 42 seq_cst, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_or_seq_cst(ptr %a) {
+; O0-LABEL: atomic16_or_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection131:
+; O0-NEXT:    lock orw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_or_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection86:
+; O1-NEXT:    lock orw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_or_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection86:
+; O2-NEXT:    lock orw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_or_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection86:
+; O3-NEXT:    lock orw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw or ptr %a, i16 42 seq_cst, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_xor_seq_cst(ptr %a) {
+; O0-LABEL: atomic16_xor_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection132:
+; O0-NEXT:    lock xorw $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_xor_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection87:
+; O1-NEXT:    lock xorw $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_xor_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection87:
+; O2-NEXT:    lock xorw $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_xor_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection87:
+; O3-NEXT:    lock xorw $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xor ptr %a, i16 42 seq_cst, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_nand_seq_cst(ptr %a) {
+; O0-LABEL: atomic16_nand_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movq foo, %rax
+; O0-NEXT:  .Lpcsection133:
+; O0-NEXT:    movw (%rdi), %ax
+; O0-NEXT:    movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; O0-NEXT:  .LBB92_1: # %atomicrmw.start
+; O0-NEXT:    # =>This Inner Loop Header: Depth=1
+; O0-NEXT:    movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT:    # implicit-def: $edx
+; O0-NEXT:    movw %ax, %dx
+; O0-NEXT:    notl %edx
+; O0-NEXT:    orl $65493, %edx # imm = 0xFFD5
+; O0-NEXT:  .Lpcsection134:
+; O0-NEXT:    # kill: def $dx killed $dx killed $edx
+; O0-NEXT:    lock cmpxchgw %dx, (%rcx)
+; O0-NEXT:  .Lpcsection135:
+; O0-NEXT:    sete %cl
+; O0-NEXT:  .Lpcsection136:
+; O0-NEXT:    testb $1, %cl
+; O0-NEXT:    movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; O0-NEXT:  .Lpcsection137:
+; O0-NEXT:    jne .LBB92_2
+; O0-NEXT:    jmp .LBB92_1
+; O0-NEXT:  .LBB92_2: # %atomicrmw.end
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_nand_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movzwl (%rdi), %eax
+; O1-NEXT:    .p2align 4, 0x90
+; O1-NEXT:  .LBB92_1: # %atomicrmw.start
+; O1-NEXT:    # =>This Inner Loop Header: Depth=1
+; O1-NEXT:    movl %eax, %ecx
+; O1-NEXT:    notl %ecx
+; O1-NEXT:    orl $65493, %ecx # imm = 0xFFD5
+; O1-NEXT:    # kill: def $ax killed $ax killed $eax
+; O1-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O1-NEXT:  .Lpcsection88:
+; O1-NEXT:    # kill: def $ax killed $ax def $eax
+; O1-NEXT:  .Lpcsection89:
+; O1-NEXT:    jne .LBB92_1
+; O1-NEXT:  # %bb.2: # %atomicrmw.end
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_nand_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movzwl (%rdi), %eax
+; O2-NEXT:    .p2align 4, 0x90
+; O2-NEXT:  .LBB92_1: # %atomicrmw.start
+; O2-NEXT:    # =>This Inner Loop Header: Depth=1
+; O2-NEXT:    movl %eax, %ecx
+; O2-NEXT:    notl %ecx
+; O2-NEXT:    orl $65493, %ecx # imm = 0xFFD5
+; O2-NEXT:    # kill: def $ax killed $ax killed $eax
+; O2-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O2-NEXT:  .Lpcsection88:
+; O2-NEXT:    # kill: def $ax killed $ax def $eax
+; O2-NEXT:  .Lpcsection89:
+; O2-NEXT:    jne .LBB92_1
+; O2-NEXT:  # %bb.2: # %atomicrmw.end
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_nand_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movzwl (%rdi), %eax
+; O3-NEXT:    .p2align 4, 0x90
+; O3-NEXT:  .LBB92_1: # %atomicrmw.start
+; O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; O3-NEXT:    movl %eax, %ecx
+; O3-NEXT:    notl %ecx
+; O3-NEXT:    orl $65493, %ecx # imm = 0xFFD5
+; O3-NEXT:    # kill: def $ax killed $ax killed $eax
+; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O3-NEXT:  .Lpcsection88:
+; O3-NEXT:    # kill: def $ax killed $ax def $eax
+; O3-NEXT:  .Lpcsection89:
+; O3-NEXT:    jne .LBB92_1
+; O3-NEXT:  # %bb.2: # %atomicrmw.end
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw nand ptr %a, i16 42 seq_cst, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_cas_monotonic(ptr %a) {
+; O0-LABEL: atomic16_cas_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movw $42, %ax
+; O0-NEXT:    movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; O0-NEXT:    movw $1, %cx
+; O0-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O0-NEXT:  .Lpcsection138:
+; O0-NEXT:    # kill: def $dx killed $ax
+; O0-NEXT:    movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload
+; O0-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O0-NEXT:  .Lpcsection139:
+; O0-NEXT:    # kill: def $dx killed $ax
+; O0-NEXT:    movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload
+; O0-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_cas_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movw $1, %cx
+; O1-NEXT:    movw $42, %ax
+; O1-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O1-NEXT:    movw $42, %ax
+; O1-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O1-NEXT:    movw $42, %ax
+; O1-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_cas_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movw $1, %cx
+; O2-NEXT:    movw $42, %ax
+; O2-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O2-NEXT:    movw $42, %ax
+; O2-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O2-NEXT:    movw $42, %ax
+; O2-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_cas_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movw $1, %cx
+; O3-NEXT:    movw $42, %ax
+; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O3-NEXT:    movw $42, %ax
+; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O3-NEXT:    movw $42, %ax
+; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = cmpxchg ptr %a, i16 42, i16 1 monotonic monotonic, align 2, !pcsections !0
+  %y = cmpxchg ptr %a, i16 42, i16 1 monotonic acquire, align 2, !pcsections !0
+  %z = cmpxchg ptr %a, i16 42, i16 1 monotonic seq_cst, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_cas_acquire(ptr %a) {
+; O0-LABEL: atomic16_cas_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movw $42, %ax
+; O0-NEXT:    movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; O0-NEXT:    movw $1, %cx
+; O0-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O0-NEXT:  .Lpcsection140:
+; O0-NEXT:    # kill: def $dx killed $ax
+; O0-NEXT:    movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload
+; O0-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O0-NEXT:  .Lpcsection141:
+; O0-NEXT:    # kill: def $dx killed $ax
+; O0-NEXT:    movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload
+; O0-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_cas_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movw $1, %cx
+; O1-NEXT:    movw $42, %ax
+; O1-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O1-NEXT:    movw $42, %ax
+; O1-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O1-NEXT:    movw $42, %ax
+; O1-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_cas_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movw $1, %cx
+; O2-NEXT:    movw $42, %ax
+; O2-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O2-NEXT:    movw $42, %ax
+; O2-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O2-NEXT:    movw $42, %ax
+; O2-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_cas_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movw $1, %cx
+; O3-NEXT:    movw $42, %ax
+; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O3-NEXT:    movw $42, %ax
+; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O3-NEXT:    movw $42, %ax
+; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = cmpxchg ptr %a, i16 42, i16 1 acquire monotonic, align 2, !pcsections !0
+  %y = cmpxchg ptr %a, i16 42, i16 1 acquire acquire, align 2, !pcsections !0
+  %z = cmpxchg ptr %a, i16 42, i16 1 acquire seq_cst, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_cas_release(ptr %a) {
+; O0-LABEL: atomic16_cas_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movw $42, %ax
+; O0-NEXT:    movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; O0-NEXT:    movw $1, %cx
+; O0-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O0-NEXT:  .Lpcsection142:
+; O0-NEXT:    # kill: def $dx killed $ax
+; O0-NEXT:    movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload
+; O0-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O0-NEXT:  .Lpcsection143:
+; O0-NEXT:    # kill: def $dx killed $ax
+; O0-NEXT:    movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload
+; O0-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_cas_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movw $1, %cx
+; O1-NEXT:    movw $42, %ax
+; O1-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O1-NEXT:    movw $42, %ax
+; O1-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O1-NEXT:    movw $42, %ax
+; O1-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_cas_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movw $1, %cx
+; O2-NEXT:    movw $42, %ax
+; O2-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O2-NEXT:    movw $42, %ax
+; O2-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O2-NEXT:    movw $42, %ax
+; O2-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_cas_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movw $1, %cx
+; O3-NEXT:    movw $42, %ax
+; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O3-NEXT:    movw $42, %ax
+; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O3-NEXT:    movw $42, %ax
+; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = cmpxchg ptr %a, i16 42, i16 1 release monotonic, align 2, !pcsections !0
+  %y = cmpxchg ptr %a, i16 42, i16 1 release acquire, align 2, !pcsections !0
+  %z = cmpxchg ptr %a, i16 42, i16 1 release seq_cst, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_cas_acq_rel(ptr %a) {
+; O0-LABEL: atomic16_cas_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movw $42, %ax
+; O0-NEXT:    movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; O0-NEXT:    movw $1, %cx
+; O0-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O0-NEXT:  .Lpcsection144:
+; O0-NEXT:    # kill: def $dx killed $ax
+; O0-NEXT:    movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload
+; O0-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O0-NEXT:  .Lpcsection145:
+; O0-NEXT:    # kill: def $dx killed $ax
+; O0-NEXT:    movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload
+; O0-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_cas_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movw $1, %cx
+; O1-NEXT:    movw $42, %ax
+; O1-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O1-NEXT:    movw $42, %ax
+; O1-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O1-NEXT:    movw $42, %ax
+; O1-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_cas_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movw $1, %cx
+; O2-NEXT:    movw $42, %ax
+; O2-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O2-NEXT:    movw $42, %ax
+; O2-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O2-NEXT:    movw $42, %ax
+; O2-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_cas_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movw $1, %cx
+; O3-NEXT:    movw $42, %ax
+; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O3-NEXT:    movw $42, %ax
+; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O3-NEXT:    movw $42, %ax
+; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = cmpxchg ptr %a, i16 42, i16 1 acq_rel monotonic, align 2, !pcsections !0
+  %y = cmpxchg ptr %a, i16 42, i16 1 acq_rel acquire, align 2, !pcsections !0
+  %z = cmpxchg ptr %a, i16 42, i16 1 acq_rel seq_cst, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic16_cas_seq_cst(ptr %a) {
+; O0-LABEL: atomic16_cas_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movw $42, %ax
+; O0-NEXT:    movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; O0-NEXT:    movw $1, %cx
+; O0-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O0-NEXT:  .Lpcsection146:
+; O0-NEXT:    # kill: def $dx killed $ax
+; O0-NEXT:    movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload
+; O0-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O0-NEXT:  .Lpcsection147:
+; O0-NEXT:    # kill: def $dx killed $ax
+; O0-NEXT:    movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload
+; O0-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic16_cas_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movw $1, %cx
+; O1-NEXT:    movw $42, %ax
+; O1-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O1-NEXT:    movw $42, %ax
+; O1-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O1-NEXT:    movw $42, %ax
+; O1-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic16_cas_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movw $1, %cx
+; O2-NEXT:    movw $42, %ax
+; O2-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O2-NEXT:    movw $42, %ax
+; O2-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O2-NEXT:    movw $42, %ax
+; O2-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic16_cas_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movw $1, %cx
+; O3-NEXT:    movw $42, %ax
+; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O3-NEXT:    movw $42, %ax
+; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O3-NEXT:    movw $42, %ax
+; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = cmpxchg ptr %a, i16 42, i16 1 seq_cst monotonic, align 2, !pcsections !0
+  %y = cmpxchg ptr %a, i16 42, i16 1 seq_cst acquire, align 2, !pcsections !0
+  %z = cmpxchg ptr %a, i16 42, i16 1 seq_cst seq_cst, align 2, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define i32 @atomic32_load_unordered(ptr %a) {
+; O0-LABEL: atomic32_load_unordered:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection148:
+; O0-NEXT:    movl (%rdi), %eax
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_load_unordered:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection90:
+; O1-NEXT:    movl (%rdi), %eax
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_load_unordered:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection90:
+; O2-NEXT:    movl (%rdi), %eax
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_load_unordered:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection90:
+; O3-NEXT:    movl (%rdi), %eax
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = load atomic i32, ptr %a unordered, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret i32 %x
+}
+
+define i32 @atomic32_load_monotonic(ptr %a) {
+; O0-LABEL: atomic32_load_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection149:
+; O0-NEXT:    movl (%rdi), %eax
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_load_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection91:
+; O1-NEXT:    movl (%rdi), %eax
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_load_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection91:
+; O2-NEXT:    movl (%rdi), %eax
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_load_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection91:
+; O3-NEXT:    movl (%rdi), %eax
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = load atomic i32, ptr %a monotonic, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret i32 %x
+}
+
+define i32 @atomic32_load_acquire(ptr %a) {
+; O0-LABEL: atomic32_load_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection150:
+; O0-NEXT:    movl (%rdi), %eax
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_load_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection92:
+; O1-NEXT:    movl (%rdi), %eax
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_load_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection92:
+; O2-NEXT:    movl (%rdi), %eax
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_load_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection92:
+; O3-NEXT:    movl (%rdi), %eax
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = load atomic i32, ptr %a acquire, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret i32 %x
+}
+
+define i32 @atomic32_load_seq_cst(ptr %a) {
+; O0-LABEL: atomic32_load_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection151:
+; O0-NEXT:    movl (%rdi), %eax
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_load_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection93:
+; O1-NEXT:    movl (%rdi), %eax
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_load_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection93:
+; O2-NEXT:    movl (%rdi), %eax
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_load_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection93:
+; O3-NEXT:    movl (%rdi), %eax
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = load atomic i32, ptr %a seq_cst, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret i32 %x
+}
+
+define void @atomic32_store_unordered(ptr %a) {
+; O0-LABEL: atomic32_store_unordered:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection152:
+; O0-NEXT:    movl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_store_unordered:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection94:
+; O1-NEXT:    movl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_store_unordered:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection94:
+; O2-NEXT:    movl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_store_unordered:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection94:
+; O3-NEXT:    movl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  store atomic i32 42, ptr %a unordered, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_store_monotonic(ptr %a) {
+; O0-LABEL: atomic32_store_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection153:
+; O0-NEXT:    movl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_store_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection95:
+; O1-NEXT:    movl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_store_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection95:
+; O2-NEXT:    movl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_store_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection95:
+; O3-NEXT:    movl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  store atomic i32 42, ptr %a monotonic, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_store_release(ptr %a) {
+; O0-LABEL: atomic32_store_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection154:
+; O0-NEXT:    movl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_store_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection96:
+; O1-NEXT:    movl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_store_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection96:
+; O2-NEXT:    movl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_store_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection96:
+; O3-NEXT:    movl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  store atomic i32 42, ptr %a release, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_store_seq_cst(ptr %a) {
+; O0-LABEL: atomic32_store_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movl $42, %eax
+; O0-NEXT:  .Lpcsection155:
+; O0-NEXT:    xchgl %eax, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_store_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:  .Lpcsection97:
+; O1-NEXT:    xchgl %eax, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_store_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:  .Lpcsection97:
+; O2-NEXT:    xchgl %eax, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_store_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:  .Lpcsection97:
+; O3-NEXT:    xchgl %eax, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  store atomic i32 42, ptr %a seq_cst, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_xchg_monotonic(ptr %a) {
+; O0-LABEL: atomic32_xchg_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movl $42, %eax
+; O0-NEXT:  .Lpcsection156:
+; O0-NEXT:    xchgl %eax, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_xchg_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:  .Lpcsection98:
+; O1-NEXT:    xchgl %eax, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_xchg_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:  .Lpcsection98:
+; O2-NEXT:    xchgl %eax, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_xchg_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:  .Lpcsection98:
+; O3-NEXT:    xchgl %eax, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xchg ptr %a, i32 42 monotonic, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_add_monotonic(ptr %a) {
+; O0-LABEL: atomic32_add_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection157:
+; O0-NEXT:    lock addl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_add_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection99:
+; O1-NEXT:    lock addl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_add_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection99:
+; O2-NEXT:    lock addl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_add_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection99:
+; O3-NEXT:    lock addl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw add ptr %a, i32 42 monotonic, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_sub_monotonic(ptr %a) {
+; O0-LABEL: atomic32_sub_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection158:
+; O0-NEXT:    lock subl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_sub_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection100:
+; O1-NEXT:    lock subl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_sub_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection100:
+; O2-NEXT:    lock subl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_sub_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection100:
+; O3-NEXT:    lock subl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw sub ptr %a, i32 42 monotonic, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_and_monotonic(ptr %a) {
+; O0-LABEL: atomic32_and_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection159:
+; O0-NEXT:    lock andl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_and_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection101:
+; O1-NEXT:    lock andl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_and_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection101:
+; O2-NEXT:    lock andl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_and_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection101:
+; O3-NEXT:    lock andl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw and ptr %a, i32 42 monotonic, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_or_monotonic(ptr %a) {
+; O0-LABEL: atomic32_or_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection160:
+; O0-NEXT:    lock orl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_or_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection102:
+; O1-NEXT:    lock orl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_or_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection102:
+; O2-NEXT:    lock orl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_or_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection102:
+; O3-NEXT:    lock orl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw or ptr %a, i32 42 monotonic, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_xor_monotonic(ptr %a) {
+; O0-LABEL: atomic32_xor_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection161:
+; O0-NEXT:    lock xorl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_xor_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection103:
+; O1-NEXT:    lock xorl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_xor_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection103:
+; O2-NEXT:    lock xorl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_xor_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection103:
+; O3-NEXT:    lock xorl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xor ptr %a, i32 42 monotonic, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_nand_monotonic(ptr %a) {
+; O0-LABEL: atomic32_nand_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movq foo, %rax
+; O0-NEXT:  .Lpcsection162:
+; O0-NEXT:    movl (%rdi), %eax
+; O0-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; O0-NEXT:  .LBB112_1: # %atomicrmw.start
+; O0-NEXT:    # =>This Inner Loop Header: Depth=1
+; O0-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT:    movl %eax, %edx
+; O0-NEXT:    notl %edx
+; O0-NEXT:  .Lpcsection163:
+; O0-NEXT:    orl $-43, %edx
+; O0-NEXT:    lock cmpxchgl %edx, (%rcx)
+; O0-NEXT:  .Lpcsection164:
+; O0-NEXT:    sete %cl
+; O0-NEXT:  .Lpcsection165:
+; O0-NEXT:    testb $1, %cl
+; O0-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; O0-NEXT:  .Lpcsection166:
+; O0-NEXT:    jne .LBB112_2
+; O0-NEXT:    jmp .LBB112_1
+; O0-NEXT:  .LBB112_2: # %atomicrmw.end
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_nand_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection104:
+; O1-NEXT:    movl (%rdi), %eax
+; O1-NEXT:    .p2align 4, 0x90
+; O1-NEXT:  .LBB112_1: # %atomicrmw.start
+; O1-NEXT:    # =>This Inner Loop Header: Depth=1
+; O1-NEXT:    movl %eax, %ecx
+; O1-NEXT:    notl %ecx
+; O1-NEXT:  .Lpcsection105:
+; O1-NEXT:    orl $-43, %ecx
+; O1-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O1-NEXT:  .Lpcsection106:
+; O1-NEXT:    jne .LBB112_1
+; O1-NEXT:  # %bb.2: # %atomicrmw.end
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_nand_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection104:
+; O2-NEXT:    movl (%rdi), %eax
+; O2-NEXT:    .p2align 4, 0x90
+; O2-NEXT:  .LBB112_1: # %atomicrmw.start
+; O2-NEXT:    # =>This Inner Loop Header: Depth=1
+; O2-NEXT:    movl %eax, %ecx
+; O2-NEXT:    notl %ecx
+; O2-NEXT:  .Lpcsection105:
+; O2-NEXT:    orl $-43, %ecx
+; O2-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O2-NEXT:  .Lpcsection106:
+; O2-NEXT:    jne .LBB112_1
+; O2-NEXT:  # %bb.2: # %atomicrmw.end
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_nand_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection104:
+; O3-NEXT:    movl (%rdi), %eax
+; O3-NEXT:    .p2align 4, 0x90
+; O3-NEXT:  .LBB112_1: # %atomicrmw.start
+; O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; O3-NEXT:    movl %eax, %ecx
+; O3-NEXT:    notl %ecx
+; O3-NEXT:  .Lpcsection105:
+; O3-NEXT:    orl $-43, %ecx
+; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O3-NEXT:  .Lpcsection106:
+; O3-NEXT:    jne .LBB112_1
+; O3-NEXT:  # %bb.2: # %atomicrmw.end
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw nand ptr %a, i32 42 monotonic, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_xchg_acquire(ptr %a) {
+; O0-LABEL: atomic32_xchg_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movl $42, %eax
+; O0-NEXT:  .Lpcsection167:
+; O0-NEXT:    xchgl %eax, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_xchg_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:  .Lpcsection107:
+; O1-NEXT:    xchgl %eax, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_xchg_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:  .Lpcsection107:
+; O2-NEXT:    xchgl %eax, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_xchg_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:  .Lpcsection107:
+; O3-NEXT:    xchgl %eax, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xchg ptr %a, i32 42 acquire, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_add_acquire(ptr %a) {
+; O0-LABEL: atomic32_add_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection168:
+; O0-NEXT:    lock addl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_add_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection108:
+; O1-NEXT:    lock addl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_add_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection108:
+; O2-NEXT:    lock addl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_add_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection108:
+; O3-NEXT:    lock addl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw add ptr %a, i32 42 acquire, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_sub_acquire(ptr %a) {
+; O0-LABEL: atomic32_sub_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection169:
+; O0-NEXT:    lock subl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_sub_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection109:
+; O1-NEXT:    lock subl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_sub_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection109:
+; O2-NEXT:    lock subl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_sub_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection109:
+; O3-NEXT:    lock subl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw sub ptr %a, i32 42 acquire, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_and_acquire(ptr %a) {
+; O0-LABEL: atomic32_and_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection170:
+; O0-NEXT:    lock andl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_and_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection110:
+; O1-NEXT:    lock andl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_and_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection110:
+; O2-NEXT:    lock andl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_and_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection110:
+; O3-NEXT:    lock andl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw and ptr %a, i32 42 acquire, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_or_acquire(ptr %a) {
+; O0-LABEL: atomic32_or_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection171:
+; O0-NEXT:    lock orl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_or_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection111:
+; O1-NEXT:    lock orl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_or_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection111:
+; O2-NEXT:    lock orl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_or_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection111:
+; O3-NEXT:    lock orl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw or ptr %a, i32 42 acquire, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_xor_acquire(ptr %a) {
+; O0-LABEL: atomic32_xor_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection172:
+; O0-NEXT:    lock xorl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_xor_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection112:
+; O1-NEXT:    lock xorl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_xor_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection112:
+; O2-NEXT:    lock xorl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_xor_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection112:
+; O3-NEXT:    lock xorl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xor ptr %a, i32 42 acquire, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_nand_acquire(ptr %a) {
+; O0-LABEL: atomic32_nand_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movq foo, %rax
+; O0-NEXT:  .Lpcsection173:
+; O0-NEXT:    movl (%rdi), %eax
+; O0-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; O0-NEXT:  .LBB119_1: # %atomicrmw.start
+; O0-NEXT:    # =>This Inner Loop Header: Depth=1
+; O0-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT:    movl %eax, %edx
+; O0-NEXT:    notl %edx
+; O0-NEXT:  .Lpcsection174:
+; O0-NEXT:    orl $-43, %edx
+; O0-NEXT:    lock cmpxchgl %edx, (%rcx)
+; O0-NEXT:  .Lpcsection175:
+; O0-NEXT:    sete %cl
+; O0-NEXT:  .Lpcsection176:
+; O0-NEXT:    testb $1, %cl
+; O0-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; O0-NEXT:  .Lpcsection177:
+; O0-NEXT:    jne .LBB119_2
+; O0-NEXT:    jmp .LBB119_1
+; O0-NEXT:  .LBB119_2: # %atomicrmw.end
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_nand_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection113:
+; O1-NEXT:    movl (%rdi), %eax
+; O1-NEXT:    .p2align 4, 0x90
+; O1-NEXT:  .LBB119_1: # %atomicrmw.start
+; O1-NEXT:    # =>This Inner Loop Header: Depth=1
+; O1-NEXT:    movl %eax, %ecx
+; O1-NEXT:    notl %ecx
+; O1-NEXT:  .Lpcsection114:
+; O1-NEXT:    orl $-43, %ecx
+; O1-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O1-NEXT:  .Lpcsection115:
+; O1-NEXT:    jne .LBB119_1
+; O1-NEXT:  # %bb.2: # %atomicrmw.end
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_nand_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection113:
+; O2-NEXT:    movl (%rdi), %eax
+; O2-NEXT:    .p2align 4, 0x90
+; O2-NEXT:  .LBB119_1: # %atomicrmw.start
+; O2-NEXT:    # =>This Inner Loop Header: Depth=1
+; O2-NEXT:    movl %eax, %ecx
+; O2-NEXT:    notl %ecx
+; O2-NEXT:  .Lpcsection114:
+; O2-NEXT:    orl $-43, %ecx
+; O2-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O2-NEXT:  .Lpcsection115:
+; O2-NEXT:    jne .LBB119_1
+; O2-NEXT:  # %bb.2: # %atomicrmw.end
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_nand_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection113:
+; O3-NEXT:    movl (%rdi), %eax
+; O3-NEXT:    .p2align 4, 0x90
+; O3-NEXT:  .LBB119_1: # %atomicrmw.start
+; O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; O3-NEXT:    movl %eax, %ecx
+; O3-NEXT:    notl %ecx
+; O3-NEXT:  .Lpcsection114:
+; O3-NEXT:    orl $-43, %ecx
+; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O3-NEXT:  .Lpcsection115:
+; O3-NEXT:    jne .LBB119_1
+; O3-NEXT:  # %bb.2: # %atomicrmw.end
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw nand ptr %a, i32 42 acquire, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_xchg_release(ptr %a) {
+; O0-LABEL: atomic32_xchg_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movl $42, %eax
+; O0-NEXT:  .Lpcsection178:
+; O0-NEXT:    xchgl %eax, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_xchg_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:  .Lpcsection116:
+; O1-NEXT:    xchgl %eax, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_xchg_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:  .Lpcsection116:
+; O2-NEXT:    xchgl %eax, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_xchg_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:  .Lpcsection116:
+; O3-NEXT:    xchgl %eax, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xchg ptr %a, i32 42 release, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_add_release(ptr %a) {
+; O0-LABEL: atomic32_add_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection179:
+; O0-NEXT:    lock addl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_add_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection117:
+; O1-NEXT:    lock addl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_add_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection117:
+; O2-NEXT:    lock addl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_add_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection117:
+; O3-NEXT:    lock addl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw add ptr %a, i32 42 release, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_sub_release(ptr %a) {
+; O0-LABEL: atomic32_sub_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection180:
+; O0-NEXT:    lock subl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_sub_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection118:
+; O1-NEXT:    lock subl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_sub_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection118:
+; O2-NEXT:    lock subl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_sub_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection118:
+; O3-NEXT:    lock subl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw sub ptr %a, i32 42 release, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_and_release(ptr %a) {
+; O0-LABEL: atomic32_and_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection181:
+; O0-NEXT:    lock andl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_and_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection119:
+; O1-NEXT:    lock andl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_and_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection119:
+; O2-NEXT:    lock andl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_and_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection119:
+; O3-NEXT:    lock andl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw and ptr %a, i32 42 release, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_or_release(ptr %a) {
+; O0-LABEL: atomic32_or_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection182:
+; O0-NEXT:    lock orl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_or_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection120:
+; O1-NEXT:    lock orl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_or_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection120:
+; O2-NEXT:    lock orl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_or_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection120:
+; O3-NEXT:    lock orl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw or ptr %a, i32 42 release, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_xor_release(ptr %a) {
+; O0-LABEL: atomic32_xor_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection183:
+; O0-NEXT:    lock xorl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_xor_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection121:
+; O1-NEXT:    lock xorl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_xor_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection121:
+; O2-NEXT:    lock xorl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_xor_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection121:
+; O3-NEXT:    lock xorl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xor ptr %a, i32 42 release, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_nand_release(ptr %a) {
+; O0-LABEL: atomic32_nand_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movq foo, %rax
+; O0-NEXT:  .Lpcsection184:
+; O0-NEXT:    movl (%rdi), %eax
+; O0-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; O0-NEXT:  .LBB126_1: # %atomicrmw.start
+; O0-NEXT:    # =>This Inner Loop Header: Depth=1
+; O0-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT:    movl %eax, %edx
+; O0-NEXT:    notl %edx
+; O0-NEXT:  .Lpcsection185:
+; O0-NEXT:    orl $-43, %edx
+; O0-NEXT:    lock cmpxchgl %edx, (%rcx)
+; O0-NEXT:  .Lpcsection186:
+; O0-NEXT:    sete %cl
+; O0-NEXT:  .Lpcsection187:
+; O0-NEXT:    testb $1, %cl
+; O0-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; O0-NEXT:  .Lpcsection188:
+; O0-NEXT:    jne .LBB126_2
+; O0-NEXT:    jmp .LBB126_1
+; O0-NEXT:  .LBB126_2: # %atomicrmw.end
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_nand_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection122:
+; O1-NEXT:    movl (%rdi), %eax
+; O1-NEXT:    .p2align 4, 0x90
+; O1-NEXT:  .LBB126_1: # %atomicrmw.start
+; O1-NEXT:    # =>This Inner Loop Header: Depth=1
+; O1-NEXT:    movl %eax, %ecx
+; O1-NEXT:    notl %ecx
+; O1-NEXT:  .Lpcsection123:
+; O1-NEXT:    orl $-43, %ecx
+; O1-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O1-NEXT:  .Lpcsection124:
+; O1-NEXT:    jne .LBB126_1
+; O1-NEXT:  # %bb.2: # %atomicrmw.end
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_nand_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection122:
+; O2-NEXT:    movl (%rdi), %eax
+; O2-NEXT:    .p2align 4, 0x90
+; O2-NEXT:  .LBB126_1: # %atomicrmw.start
+; O2-NEXT:    # =>This Inner Loop Header: Depth=1
+; O2-NEXT:    movl %eax, %ecx
+; O2-NEXT:    notl %ecx
+; O2-NEXT:  .Lpcsection123:
+; O2-NEXT:    orl $-43, %ecx
+; O2-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O2-NEXT:  .Lpcsection124:
+; O2-NEXT:    jne .LBB126_1
+; O2-NEXT:  # %bb.2: # %atomicrmw.end
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_nand_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection122:
+; O3-NEXT:    movl (%rdi), %eax
+; O3-NEXT:    .p2align 4, 0x90
+; O3-NEXT:  .LBB126_1: # %atomicrmw.start
+; O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; O3-NEXT:    movl %eax, %ecx
+; O3-NEXT:    notl %ecx
+; O3-NEXT:  .Lpcsection123:
+; O3-NEXT:    orl $-43, %ecx
+; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O3-NEXT:  .Lpcsection124:
+; O3-NEXT:    jne .LBB126_1
+; O3-NEXT:  # %bb.2: # %atomicrmw.end
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw nand ptr %a, i32 42 release, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_xchg_acq_rel(ptr %a) {
+; O0-LABEL: atomic32_xchg_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movl $42, %eax
+; O0-NEXT:  .Lpcsection189:
+; O0-NEXT:    xchgl %eax, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_xchg_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:  .Lpcsection125:
+; O1-NEXT:    xchgl %eax, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_xchg_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:  .Lpcsection125:
+; O2-NEXT:    xchgl %eax, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_xchg_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:  .Lpcsection125:
+; O3-NEXT:    xchgl %eax, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xchg ptr %a, i32 42 acq_rel, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_add_acq_rel(ptr %a) {
+; O0-LABEL: atomic32_add_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection190:
+; O0-NEXT:    lock addl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_add_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection126:
+; O1-NEXT:    lock addl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_add_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection126:
+; O2-NEXT:    lock addl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_add_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection126:
+; O3-NEXT:    lock addl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw add ptr %a, i32 42 acq_rel, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_sub_acq_rel(ptr %a) {
+; O0-LABEL: atomic32_sub_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection191:
+; O0-NEXT:    lock subl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_sub_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection127:
+; O1-NEXT:    lock subl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_sub_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection127:
+; O2-NEXT:    lock subl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_sub_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection127:
+; O3-NEXT:    lock subl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw sub ptr %a, i32 42 acq_rel, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_and_acq_rel(ptr %a) {
+; O0-LABEL: atomic32_and_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection192:
+; O0-NEXT:    lock andl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_and_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection128:
+; O1-NEXT:    lock andl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_and_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection128:
+; O2-NEXT:    lock andl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_and_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection128:
+; O3-NEXT:    lock andl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw and ptr %a, i32 42 acq_rel, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_or_acq_rel(ptr %a) {
+; O0-LABEL: atomic32_or_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection193:
+; O0-NEXT:    lock orl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_or_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection129:
+; O1-NEXT:    lock orl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_or_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection129:
+; O2-NEXT:    lock orl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_or_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection129:
+; O3-NEXT:    lock orl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw or ptr %a, i32 42 acq_rel, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_xor_acq_rel(ptr %a) {
+; O0-LABEL: atomic32_xor_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection194:
+; O0-NEXT:    lock xorl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_xor_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection130:
+; O1-NEXT:    lock xorl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_xor_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection130:
+; O2-NEXT:    lock xorl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_xor_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection130:
+; O3-NEXT:    lock xorl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xor ptr %a, i32 42 acq_rel, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_nand_acq_rel(ptr %a) {
+; O0-LABEL: atomic32_nand_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movq foo, %rax
+; O0-NEXT:  .Lpcsection195:
+; O0-NEXT:    movl (%rdi), %eax
+; O0-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; O0-NEXT:  .LBB133_1: # %atomicrmw.start
+; O0-NEXT:    # =>This Inner Loop Header: Depth=1
+; O0-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT:    movl %eax, %edx
+; O0-NEXT:    notl %edx
+; O0-NEXT:  .Lpcsection196:
+; O0-NEXT:    orl $-43, %edx
+; O0-NEXT:    lock cmpxchgl %edx, (%rcx)
+; O0-NEXT:  .Lpcsection197:
+; O0-NEXT:    sete %cl
+; O0-NEXT:  .Lpcsection198:
+; O0-NEXT:    testb $1, %cl
+; O0-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; O0-NEXT:  .Lpcsection199:
+; O0-NEXT:    jne .LBB133_2
+; O0-NEXT:    jmp .LBB133_1
+; O0-NEXT:  .LBB133_2: # %atomicrmw.end
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_nand_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection131:
+; O1-NEXT:    movl (%rdi), %eax
+; O1-NEXT:    .p2align 4, 0x90
+; O1-NEXT:  .LBB133_1: # %atomicrmw.start
+; O1-NEXT:    # =>This Inner Loop Header: Depth=1
+; O1-NEXT:    movl %eax, %ecx
+; O1-NEXT:    notl %ecx
+; O1-NEXT:  .Lpcsection132:
+; O1-NEXT:    orl $-43, %ecx
+; O1-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O1-NEXT:  .Lpcsection133:
+; O1-NEXT:    jne .LBB133_1
+; O1-NEXT:  # %bb.2: # %atomicrmw.end
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_nand_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection131:
+; O2-NEXT:    movl (%rdi), %eax
+; O2-NEXT:    .p2align 4, 0x90
+; O2-NEXT:  .LBB133_1: # %atomicrmw.start
+; O2-NEXT:    # =>This Inner Loop Header: Depth=1
+; O2-NEXT:    movl %eax, %ecx
+; O2-NEXT:    notl %ecx
+; O2-NEXT:  .Lpcsection132:
+; O2-NEXT:    orl $-43, %ecx
+; O2-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O2-NEXT:  .Lpcsection133:
+; O2-NEXT:    jne .LBB133_1
+; O2-NEXT:  # %bb.2: # %atomicrmw.end
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_nand_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection131:
+; O3-NEXT:    movl (%rdi), %eax
+; O3-NEXT:    .p2align 4, 0x90
+; O3-NEXT:  .LBB133_1: # %atomicrmw.start
+; O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; O3-NEXT:    movl %eax, %ecx
+; O3-NEXT:    notl %ecx
+; O3-NEXT:  .Lpcsection132:
+; O3-NEXT:    orl $-43, %ecx
+; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O3-NEXT:  .Lpcsection133:
+; O3-NEXT:    jne .LBB133_1
+; O3-NEXT:  # %bb.2: # %atomicrmw.end
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw nand ptr %a, i32 42 acq_rel, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_xchg_seq_cst(ptr %a) {
+; O0-LABEL: atomic32_xchg_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movl $42, %eax
+; O0-NEXT:  .Lpcsection200:
+; O0-NEXT:    xchgl %eax, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_xchg_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:  .Lpcsection134:
+; O1-NEXT:    xchgl %eax, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_xchg_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:  .Lpcsection134:
+; O2-NEXT:    xchgl %eax, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_xchg_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:  .Lpcsection134:
+; O3-NEXT:    xchgl %eax, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xchg ptr %a, i32 42 seq_cst, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_add_seq_cst(ptr %a) {
+; O0-LABEL: atomic32_add_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection201:
+; O0-NEXT:    lock addl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_add_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection135:
+; O1-NEXT:    lock addl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_add_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection135:
+; O2-NEXT:    lock addl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_add_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection135:
+; O3-NEXT:    lock addl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw add ptr %a, i32 42 seq_cst, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_sub_seq_cst(ptr %a) {
+; O0-LABEL: atomic32_sub_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection202:
+; O0-NEXT:    lock subl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_sub_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection136:
+; O1-NEXT:    lock subl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_sub_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection136:
+; O2-NEXT:    lock subl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_sub_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection136:
+; O3-NEXT:    lock subl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw sub ptr %a, i32 42 seq_cst, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_and_seq_cst(ptr %a) {
+; O0-LABEL: atomic32_and_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection203:
+; O0-NEXT:    lock andl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_and_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection137:
+; O1-NEXT:    lock andl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_and_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection137:
+; O2-NEXT:    lock andl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_and_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection137:
+; O3-NEXT:    lock andl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw and ptr %a, i32 42 seq_cst, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_or_seq_cst(ptr %a) {
+; O0-LABEL: atomic32_or_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection204:
+; O0-NEXT:    lock orl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_or_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection138:
+; O1-NEXT:    lock orl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_or_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection138:
+; O2-NEXT:    lock orl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_or_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection138:
+; O3-NEXT:    lock orl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw or ptr %a, i32 42 seq_cst, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_xor_seq_cst(ptr %a) {
+; O0-LABEL: atomic32_xor_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection205:
+; O0-NEXT:    lock xorl $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_xor_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection139:
+; O1-NEXT:    lock xorl $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_xor_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection139:
+; O2-NEXT:    lock xorl $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_xor_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection139:
+; O3-NEXT:    lock xorl $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xor ptr %a, i32 42 seq_cst, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_nand_seq_cst(ptr %a) {
+; O0-LABEL: atomic32_nand_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movq foo, %rax
+; O0-NEXT:  .Lpcsection206:
+; O0-NEXT:    movl (%rdi), %eax
+; O0-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; O0-NEXT:  .LBB140_1: # %atomicrmw.start
+; O0-NEXT:    # =>This Inner Loop Header: Depth=1
+; O0-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT:    movl %eax, %edx
+; O0-NEXT:    notl %edx
+; O0-NEXT:  .Lpcsection207:
+; O0-NEXT:    orl $-43, %edx
+; O0-NEXT:    lock cmpxchgl %edx, (%rcx)
+; O0-NEXT:  .Lpcsection208:
+; O0-NEXT:    sete %cl
+; O0-NEXT:  .Lpcsection209:
+; O0-NEXT:    testb $1, %cl
+; O0-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; O0-NEXT:  .Lpcsection210:
+; O0-NEXT:    jne .LBB140_2
+; O0-NEXT:    jmp .LBB140_1
+; O0-NEXT:  .LBB140_2: # %atomicrmw.end
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_nand_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection140:
+; O1-NEXT:    movl (%rdi), %eax
+; O1-NEXT:    .p2align 4, 0x90
+; O1-NEXT:  .LBB140_1: # %atomicrmw.start
+; O1-NEXT:    # =>This Inner Loop Header: Depth=1
+; O1-NEXT:    movl %eax, %ecx
+; O1-NEXT:    notl %ecx
+; O1-NEXT:  .Lpcsection141:
+; O1-NEXT:    orl $-43, %ecx
+; O1-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O1-NEXT:  .Lpcsection142:
+; O1-NEXT:    jne .LBB140_1
+; O1-NEXT:  # %bb.2: # %atomicrmw.end
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_nand_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection140:
+; O2-NEXT:    movl (%rdi), %eax
+; O2-NEXT:    .p2align 4, 0x90
+; O2-NEXT:  .LBB140_1: # %atomicrmw.start
+; O2-NEXT:    # =>This Inner Loop Header: Depth=1
+; O2-NEXT:    movl %eax, %ecx
+; O2-NEXT:    notl %ecx
+; O2-NEXT:  .Lpcsection141:
+; O2-NEXT:    orl $-43, %ecx
+; O2-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O2-NEXT:  .Lpcsection142:
+; O2-NEXT:    jne .LBB140_1
+; O2-NEXT:  # %bb.2: # %atomicrmw.end
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_nand_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection140:
+; O3-NEXT:    movl (%rdi), %eax
+; O3-NEXT:    .p2align 4, 0x90
+; O3-NEXT:  .LBB140_1: # %atomicrmw.start
+; O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; O3-NEXT:    movl %eax, %ecx
+; O3-NEXT:    notl %ecx
+; O3-NEXT:  .Lpcsection141:
+; O3-NEXT:    orl $-43, %ecx
+; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O3-NEXT:  .Lpcsection142:
+; O3-NEXT:    jne .LBB140_1
+; O3-NEXT:  # %bb.2: # %atomicrmw.end
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw nand ptr %a, i32 42 seq_cst, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_cas_monotonic(ptr %a) {
+; O0-LABEL: atomic32_cas_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movl $42, %eax
+; O0-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; O0-NEXT:    movl $1, %ecx
+; O0-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O0-NEXT:  .Lpcsection211:
+; O0-NEXT:    # kill: def $edx killed $eax
+; O0-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; O0-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O0-NEXT:  .Lpcsection212:
+; O0-NEXT:    # kill: def $edx killed $eax
+; O0-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; O0-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_cas_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movl $1, %ecx
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_cas_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movl $1, %ecx
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_cas_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movl $1, %ecx
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = cmpxchg ptr %a, i32 42, i32 1 monotonic monotonic, align 4, !pcsections !0
+  %y = cmpxchg ptr %a, i32 42, i32 1 monotonic acquire, align 4, !pcsections !0
+  %z = cmpxchg ptr %a, i32 42, i32 1 monotonic seq_cst, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_cas_acquire(ptr %a) {
+; O0-LABEL: atomic32_cas_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movl $42, %eax
+; O0-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; O0-NEXT:    movl $1, %ecx
+; O0-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O0-NEXT:  .Lpcsection213:
+; O0-NEXT:    # kill: def $edx killed $eax
+; O0-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; O0-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O0-NEXT:  .Lpcsection214:
+; O0-NEXT:    # kill: def $edx killed $eax
+; O0-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; O0-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_cas_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movl $1, %ecx
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_cas_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movl $1, %ecx
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_cas_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movl $1, %ecx
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = cmpxchg ptr %a, i32 42, i32 1 acquire monotonic, align 4, !pcsections !0
+  %y = cmpxchg ptr %a, i32 42, i32 1 acquire acquire, align 4, !pcsections !0
+  %z = cmpxchg ptr %a, i32 42, i32 1 acquire seq_cst, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_cas_release(ptr %a) {
+; O0-LABEL: atomic32_cas_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movl $42, %eax
+; O0-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; O0-NEXT:    movl $1, %ecx
+; O0-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O0-NEXT:  .Lpcsection215:
+; O0-NEXT:    # kill: def $edx killed $eax
+; O0-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; O0-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O0-NEXT:  .Lpcsection216:
+; O0-NEXT:    # kill: def $edx killed $eax
+; O0-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; O0-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_cas_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movl $1, %ecx
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_cas_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movl $1, %ecx
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_cas_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movl $1, %ecx
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = cmpxchg ptr %a, i32 42, i32 1 release monotonic, align 4, !pcsections !0
+  %y = cmpxchg ptr %a, i32 42, i32 1 release acquire, align 4, !pcsections !0
+  %z = cmpxchg ptr %a, i32 42, i32 1 release seq_cst, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_cas_acq_rel(ptr %a) {
+; O0-LABEL: atomic32_cas_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movl $42, %eax
+; O0-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; O0-NEXT:    movl $1, %ecx
+; O0-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O0-NEXT:  .Lpcsection217:
+; O0-NEXT:    # kill: def $edx killed $eax
+; O0-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; O0-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O0-NEXT:  .Lpcsection218:
+; O0-NEXT:    # kill: def $edx killed $eax
+; O0-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; O0-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_cas_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movl $1, %ecx
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_cas_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movl $1, %ecx
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_cas_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movl $1, %ecx
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = cmpxchg ptr %a, i32 42, i32 1 acq_rel monotonic, align 4, !pcsections !0
+  %y = cmpxchg ptr %a, i32 42, i32 1 acq_rel acquire, align 4, !pcsections !0
+  %z = cmpxchg ptr %a, i32 42, i32 1 acq_rel seq_cst, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic32_cas_seq_cst(ptr %a) {
+; O0-LABEL: atomic32_cas_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movl $42, %eax
+; O0-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; O0-NEXT:    movl $1, %ecx
+; O0-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O0-NEXT:  .Lpcsection219:
+; O0-NEXT:    # kill: def $edx killed $eax
+; O0-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; O0-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O0-NEXT:  .Lpcsection220:
+; O0-NEXT:    # kill: def $edx killed $eax
+; O0-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; O0-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic32_cas_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movl $1, %ecx
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic32_cas_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movl $1, %ecx
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic32_cas_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movl $1, %ecx
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = cmpxchg ptr %a, i32 42, i32 1 seq_cst monotonic, align 4, !pcsections !0
+  %y = cmpxchg ptr %a, i32 42, i32 1 seq_cst acquire, align 4, !pcsections !0
+  %z = cmpxchg ptr %a, i32 42, i32 1 seq_cst seq_cst, align 4, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define i64 @atomic64_load_unordered(ptr %a) {
+; O0-LABEL: atomic64_load_unordered:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection221:
+; O0-NEXT:    movq (%rdi), %rax
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_load_unordered:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection143:
+; O1-NEXT:    movq (%rdi), %rax
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_load_unordered:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection143:
+; O2-NEXT:    movq (%rdi), %rax
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_load_unordered:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection143:
+; O3-NEXT:    movq (%rdi), %rax
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = load atomic i64, ptr %a unordered, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret i64 %x
+}
+
+define i64 @atomic64_load_monotonic(ptr %a) {
+; O0-LABEL: atomic64_load_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection222:
+; O0-NEXT:    movq (%rdi), %rax
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_load_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection144:
+; O1-NEXT:    movq (%rdi), %rax
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_load_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection144:
+; O2-NEXT:    movq (%rdi), %rax
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_load_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection144:
+; O3-NEXT:    movq (%rdi), %rax
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = load atomic i64, ptr %a monotonic, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret i64 %x
+}
+
+define i64 @atomic64_load_acquire(ptr %a) {
+; O0-LABEL: atomic64_load_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection223:
+; O0-NEXT:    movq (%rdi), %rax
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_load_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection145:
+; O1-NEXT:    movq (%rdi), %rax
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_load_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection145:
+; O2-NEXT:    movq (%rdi), %rax
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_load_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection145:
+; O3-NEXT:    movq (%rdi), %rax
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = load atomic i64, ptr %a acquire, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret i64 %x
+}
+
+define i64 @atomic64_load_seq_cst(ptr %a) {
+; O0-LABEL: atomic64_load_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection224:
+; O0-NEXT:    movq (%rdi), %rax
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_load_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection146:
+; O1-NEXT:    movq (%rdi), %rax
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_load_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection146:
+; O2-NEXT:    movq (%rdi), %rax
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_load_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection146:
+; O3-NEXT:    movq (%rdi), %rax
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = load atomic i64, ptr %a seq_cst, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret i64 %x
+}
+
+define ptr @atomic64_load_seq_cst_ptr_ty(ptr %a) {
+; O0-LABEL: atomic64_load_seq_cst_ptr_ty:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection225:
+; O0-NEXT:    movq (%rdi), %rax
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_load_seq_cst_ptr_ty:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection147:
+; O1-NEXT:    movq (%rdi), %rax
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_load_seq_cst_ptr_ty:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection147:
+; O2-NEXT:    movq (%rdi), %rax
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_load_seq_cst_ptr_ty:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection147:
+; O3-NEXT:    movq (%rdi), %rax
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = load atomic ptr, ptr %a seq_cst, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret ptr %x
+}
+
+define void @atomic64_store_unordered(ptr %a) {
+; O0-LABEL: atomic64_store_unordered:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection226:
+; O0-NEXT:    movq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_store_unordered:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection148:
+; O1-NEXT:    movq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_store_unordered:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection148:
+; O2-NEXT:    movq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_store_unordered:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection148:
+; O3-NEXT:    movq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  store atomic i64 42, ptr %a unordered, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_store_monotonic(ptr %a) {
+; O0-LABEL: atomic64_store_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection227:
+; O0-NEXT:    movq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_store_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection149:
+; O1-NEXT:    movq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_store_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection149:
+; O2-NEXT:    movq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_store_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection149:
+; O3-NEXT:    movq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  store atomic i64 42, ptr %a monotonic, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_store_release(ptr %a) {
+; O0-LABEL: atomic64_store_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection228:
+; O0-NEXT:    movq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_store_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection150:
+; O1-NEXT:    movq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_store_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection150:
+; O2-NEXT:    movq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_store_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection150:
+; O3-NEXT:    movq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  store atomic i64 42, ptr %a release, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_store_seq_cst(ptr %a) {
+; O0-LABEL: atomic64_store_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movl $42, %eax
+; O0-NEXT:  .Lpcsection229:
+; O0-NEXT:    xchgq %rax, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_store_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:  .Lpcsection151:
+; O1-NEXT:    xchgq %rax, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_store_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:  .Lpcsection151:
+; O2-NEXT:    xchgq %rax, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_store_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:  .Lpcsection151:
+; O3-NEXT:    xchgq %rax, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  store atomic i64 42, ptr %a seq_cst, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_store_seq_cst_ptr_ty(ptr %a, ptr %v) {
+; O0-LABEL: atomic64_store_seq_cst_ptr_ty:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection230:
+; O0-NEXT:    xchgq %rsi, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_store_seq_cst_ptr_ty:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection152:
+; O1-NEXT:    xchgq %rsi, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_store_seq_cst_ptr_ty:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection152:
+; O2-NEXT:    xchgq %rsi, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_store_seq_cst_ptr_ty:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection152:
+; O3-NEXT:    xchgq %rsi, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  store atomic ptr %v, ptr %a seq_cst, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_xchg_monotonic(ptr %a) {
+; O0-LABEL: atomic64_xchg_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movl $42, %eax
+; O0-NEXT:  .Lpcsection231:
+; O0-NEXT:    xchgq %rax, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_xchg_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:  .Lpcsection153:
+; O1-NEXT:    xchgq %rax, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_xchg_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:  .Lpcsection153:
+; O2-NEXT:    xchgq %rax, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_xchg_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:  .Lpcsection153:
+; O3-NEXT:    xchgq %rax, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xchg ptr %a, i64 42 monotonic, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_add_monotonic(ptr %a) {
+; O0-LABEL: atomic64_add_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection232:
+; O0-NEXT:    lock addq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_add_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection154:
+; O1-NEXT:    lock addq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_add_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection154:
+; O2-NEXT:    lock addq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_add_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection154:
+; O3-NEXT:    lock addq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw add ptr %a, i64 42 monotonic, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_sub_monotonic(ptr %a) {
+; O0-LABEL: atomic64_sub_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection233:
+; O0-NEXT:    lock subq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_sub_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection155:
+; O1-NEXT:    lock subq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_sub_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection155:
+; O2-NEXT:    lock subq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_sub_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection155:
+; O3-NEXT:    lock subq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw sub ptr %a, i64 42 monotonic, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_and_monotonic(ptr %a) {
+; O0-LABEL: atomic64_and_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection234:
+; O0-NEXT:    lock andq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_and_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection156:
+; O1-NEXT:    lock andq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_and_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection156:
+; O2-NEXT:    lock andq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_and_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection156:
+; O3-NEXT:    lock andq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw and ptr %a, i64 42 monotonic, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_or_monotonic(ptr %a) {
+; O0-LABEL: atomic64_or_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection235:
+; O0-NEXT:    lock orq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_or_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection157:
+; O1-NEXT:    lock orq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_or_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection157:
+; O2-NEXT:    lock orq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_or_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection157:
+; O3-NEXT:    lock orq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw or ptr %a, i64 42 monotonic, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_xor_monotonic(ptr %a) {
+; O0-LABEL: atomic64_xor_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection236:
+; O0-NEXT:    lock xorq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_xor_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection158:
+; O1-NEXT:    lock xorq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_xor_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection158:
+; O2-NEXT:    lock xorq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_xor_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection158:
+; O3-NEXT:    lock xorq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xor ptr %a, i64 42 monotonic, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_nand_monotonic(ptr %a) {
+; O0-LABEL: atomic64_nand_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movq foo, %rax
+; O0-NEXT:  .Lpcsection237:
+; O0-NEXT:    movq (%rdi), %rax
+; O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:  .LBB162_1: # %atomicrmw.start
+; O0-NEXT:    # =>This Inner Loop Header: Depth=1
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT:    movl %eax, %esi
+; O0-NEXT:    notl %esi
+; O0-NEXT:    # implicit-def: $rdx
+; O0-NEXT:    movl %esi, %edx
+; O0-NEXT:  .Lpcsection238:
+; O0-NEXT:    orq $-43, %rdx
+; O0-NEXT:    lock cmpxchgq %rdx, (%rcx)
+; O0-NEXT:  .Lpcsection239:
+; O0-NEXT:    sete %cl
+; O0-NEXT:  .Lpcsection240:
+; O0-NEXT:    testb $1, %cl
+; O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:  .Lpcsection241:
+; O0-NEXT:    jne .LBB162_2
+; O0-NEXT:    jmp .LBB162_1
+; O0-NEXT:  .LBB162_2: # %atomicrmw.end
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_nand_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection159:
+; O1-NEXT:    movq (%rdi), %rax
+; O1-NEXT:    .p2align 4, 0x90
+; O1-NEXT:  .LBB162_1: # %atomicrmw.start
+; O1-NEXT:    # =>This Inner Loop Header: Depth=1
+; O1-NEXT:    movl %eax, %ecx
+; O1-NEXT:    notl %ecx
+; O1-NEXT:  .Lpcsection160:
+; O1-NEXT:    orq $-43, %rcx
+; O1-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O1-NEXT:  .Lpcsection161:
+; O1-NEXT:    jne .LBB162_1
+; O1-NEXT:  # %bb.2: # %atomicrmw.end
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_nand_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection159:
+; O2-NEXT:    movq (%rdi), %rax
+; O2-NEXT:    .p2align 4, 0x90
+; O2-NEXT:  .LBB162_1: # %atomicrmw.start
+; O2-NEXT:    # =>This Inner Loop Header: Depth=1
+; O2-NEXT:    movl %eax, %ecx
+; O2-NEXT:    notl %ecx
+; O2-NEXT:  .Lpcsection160:
+; O2-NEXT:    orq $-43, %rcx
+; O2-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O2-NEXT:  .Lpcsection161:
+; O2-NEXT:    jne .LBB162_1
+; O2-NEXT:  # %bb.2: # %atomicrmw.end
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_nand_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection159:
+; O3-NEXT:    movq (%rdi), %rax
+; O3-NEXT:    .p2align 4, 0x90
+; O3-NEXT:  .LBB162_1: # %atomicrmw.start
+; O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; O3-NEXT:    movl %eax, %ecx
+; O3-NEXT:    notl %ecx
+; O3-NEXT:  .Lpcsection160:
+; O3-NEXT:    orq $-43, %rcx
+; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O3-NEXT:  .Lpcsection161:
+; O3-NEXT:    jne .LBB162_1
+; O3-NEXT:  # %bb.2: # %atomicrmw.end
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw nand ptr %a, i64 42 monotonic, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_xchg_acquire(ptr %a) {
+; O0-LABEL: atomic64_xchg_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movl $42, %eax
+; O0-NEXT:  .Lpcsection242:
+; O0-NEXT:    xchgq %rax, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_xchg_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:  .Lpcsection162:
+; O1-NEXT:    xchgq %rax, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_xchg_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:  .Lpcsection162:
+; O2-NEXT:    xchgq %rax, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_xchg_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:  .Lpcsection162:
+; O3-NEXT:    xchgq %rax, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xchg ptr %a, i64 42 acquire, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_add_acquire(ptr %a) {
+; O0-LABEL: atomic64_add_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection243:
+; O0-NEXT:    lock addq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_add_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection163:
+; O1-NEXT:    lock addq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_add_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection163:
+; O2-NEXT:    lock addq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_add_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection163:
+; O3-NEXT:    lock addq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw add ptr %a, i64 42 acquire, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_sub_acquire(ptr %a) {
+; O0-LABEL: atomic64_sub_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection244:
+; O0-NEXT:    lock subq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_sub_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection164:
+; O1-NEXT:    lock subq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_sub_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection164:
+; O2-NEXT:    lock subq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_sub_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection164:
+; O3-NEXT:    lock subq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw sub ptr %a, i64 42 acquire, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_and_acquire(ptr %a) {
+; O0-LABEL: atomic64_and_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection245:
+; O0-NEXT:    lock andq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_and_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection165:
+; O1-NEXT:    lock andq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_and_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection165:
+; O2-NEXT:    lock andq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_and_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection165:
+; O3-NEXT:    lock andq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw and ptr %a, i64 42 acquire, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_or_acquire(ptr %a) {
+; O0-LABEL: atomic64_or_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection246:
+; O0-NEXT:    lock orq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_or_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection166:
+; O1-NEXT:    lock orq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_or_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection166:
+; O2-NEXT:    lock orq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_or_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection166:
+; O3-NEXT:    lock orq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw or ptr %a, i64 42 acquire, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_xor_acquire(ptr %a) {
+; O0-LABEL: atomic64_xor_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection247:
+; O0-NEXT:    lock xorq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_xor_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection167:
+; O1-NEXT:    lock xorq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_xor_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection167:
+; O2-NEXT:    lock xorq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_xor_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection167:
+; O3-NEXT:    lock xorq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xor ptr %a, i64 42 acquire, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_nand_acquire(ptr %a) {
+; O0-LABEL: atomic64_nand_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movq foo, %rax
+; O0-NEXT:  .Lpcsection248:
+; O0-NEXT:    movq (%rdi), %rax
+; O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:  .LBB169_1: # %atomicrmw.start
+; O0-NEXT:    # =>This Inner Loop Header: Depth=1
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT:    movl %eax, %esi
+; O0-NEXT:    notl %esi
+; O0-NEXT:    # implicit-def: $rdx
+; O0-NEXT:    movl %esi, %edx
+; O0-NEXT:  .Lpcsection249:
+; O0-NEXT:    orq $-43, %rdx
+; O0-NEXT:    lock cmpxchgq %rdx, (%rcx)
+; O0-NEXT:  .Lpcsection250:
+; O0-NEXT:    sete %cl
+; O0-NEXT:  .Lpcsection251:
+; O0-NEXT:    testb $1, %cl
+; O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:  .Lpcsection252:
+; O0-NEXT:    jne .LBB169_2
+; O0-NEXT:    jmp .LBB169_1
+; O0-NEXT:  .LBB169_2: # %atomicrmw.end
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_nand_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection168:
+; O1-NEXT:    movq (%rdi), %rax
+; O1-NEXT:    .p2align 4, 0x90
+; O1-NEXT:  .LBB169_1: # %atomicrmw.start
+; O1-NEXT:    # =>This Inner Loop Header: Depth=1
+; O1-NEXT:    movl %eax, %ecx
+; O1-NEXT:    notl %ecx
+; O1-NEXT:  .Lpcsection169:
+; O1-NEXT:    orq $-43, %rcx
+; O1-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O1-NEXT:  .Lpcsection170:
+; O1-NEXT:    jne .LBB169_1
+; O1-NEXT:  # %bb.2: # %atomicrmw.end
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_nand_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection168:
+; O2-NEXT:    movq (%rdi), %rax
+; O2-NEXT:    .p2align 4, 0x90
+; O2-NEXT:  .LBB169_1: # %atomicrmw.start
+; O2-NEXT:    # =>This Inner Loop Header: Depth=1
+; O2-NEXT:    movl %eax, %ecx
+; O2-NEXT:    notl %ecx
+; O2-NEXT:  .Lpcsection169:
+; O2-NEXT:    orq $-43, %rcx
+; O2-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O2-NEXT:  .Lpcsection170:
+; O2-NEXT:    jne .LBB169_1
+; O2-NEXT:  # %bb.2: # %atomicrmw.end
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_nand_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection168:
+; O3-NEXT:    movq (%rdi), %rax
+; O3-NEXT:    .p2align 4, 0x90
+; O3-NEXT:  .LBB169_1: # %atomicrmw.start
+; O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; O3-NEXT:    movl %eax, %ecx
+; O3-NEXT:    notl %ecx
+; O3-NEXT:  .Lpcsection169:
+; O3-NEXT:    orq $-43, %rcx
+; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O3-NEXT:  .Lpcsection170:
+; O3-NEXT:    jne .LBB169_1
+; O3-NEXT:  # %bb.2: # %atomicrmw.end
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw nand ptr %a, i64 42 acquire, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_xchg_release(ptr %a) {
+; O0-LABEL: atomic64_xchg_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movl $42, %eax
+; O0-NEXT:  .Lpcsection253:
+; O0-NEXT:    xchgq %rax, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_xchg_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:  .Lpcsection171:
+; O1-NEXT:    xchgq %rax, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_xchg_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:  .Lpcsection171:
+; O2-NEXT:    xchgq %rax, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_xchg_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:  .Lpcsection171:
+; O3-NEXT:    xchgq %rax, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xchg ptr %a, i64 42 release, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_add_release(ptr %a) {
+; O0-LABEL: atomic64_add_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection254:
+; O0-NEXT:    lock addq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_add_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection172:
+; O1-NEXT:    lock addq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_add_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection172:
+; O2-NEXT:    lock addq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_add_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection172:
+; O3-NEXT:    lock addq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw add ptr %a, i64 42 release, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_sub_release(ptr %a) {
+; O0-LABEL: atomic64_sub_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection255:
+; O0-NEXT:    lock subq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_sub_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection173:
+; O1-NEXT:    lock subq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_sub_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection173:
+; O2-NEXT:    lock subq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_sub_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection173:
+; O3-NEXT:    lock subq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw sub ptr %a, i64 42 release, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_and_release(ptr %a) {
+; O0-LABEL: atomic64_and_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection256:
+; O0-NEXT:    lock andq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_and_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection174:
+; O1-NEXT:    lock andq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_and_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection174:
+; O2-NEXT:    lock andq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_and_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection174:
+; O3-NEXT:    lock andq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw and ptr %a, i64 42 release, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_or_release(ptr %a) {
+; O0-LABEL: atomic64_or_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection257:
+; O0-NEXT:    lock orq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_or_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection175:
+; O1-NEXT:    lock orq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_or_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection175:
+; O2-NEXT:    lock orq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_or_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection175:
+; O3-NEXT:    lock orq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw or ptr %a, i64 42 release, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_xor_release(ptr %a) {
+; O0-LABEL: atomic64_xor_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection258:
+; O0-NEXT:    lock xorq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_xor_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection176:
+; O1-NEXT:    lock xorq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_xor_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection176:
+; O2-NEXT:    lock xorq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_xor_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection176:
+; O3-NEXT:    lock xorq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xor ptr %a, i64 42 release, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_nand_release(ptr %a) {
+; O0-LABEL: atomic64_nand_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movq foo, %rax
+; O0-NEXT:  .Lpcsection259:
+; O0-NEXT:    movq (%rdi), %rax
+; O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:  .LBB176_1: # %atomicrmw.start
+; O0-NEXT:    # =>This Inner Loop Header: Depth=1
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT:    movl %eax, %esi
+; O0-NEXT:    notl %esi
+; O0-NEXT:    # implicit-def: $rdx
+; O0-NEXT:    movl %esi, %edx
+; O0-NEXT:  .Lpcsection260:
+; O0-NEXT:    orq $-43, %rdx
+; O0-NEXT:    lock cmpxchgq %rdx, (%rcx)
+; O0-NEXT:  .Lpcsection261:
+; O0-NEXT:    sete %cl
+; O0-NEXT:  .Lpcsection262:
+; O0-NEXT:    testb $1, %cl
+; O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:  .Lpcsection263:
+; O0-NEXT:    jne .LBB176_2
+; O0-NEXT:    jmp .LBB176_1
+; O0-NEXT:  .LBB176_2: # %atomicrmw.end
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_nand_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection177:
+; O1-NEXT:    movq (%rdi), %rax
+; O1-NEXT:    .p2align 4, 0x90
+; O1-NEXT:  .LBB176_1: # %atomicrmw.start
+; O1-NEXT:    # =>This Inner Loop Header: Depth=1
+; O1-NEXT:    movl %eax, %ecx
+; O1-NEXT:    notl %ecx
+; O1-NEXT:  .Lpcsection178:
+; O1-NEXT:    orq $-43, %rcx
+; O1-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O1-NEXT:  .Lpcsection179:
+; O1-NEXT:    jne .LBB176_1
+; O1-NEXT:  # %bb.2: # %atomicrmw.end
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_nand_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection177:
+; O2-NEXT:    movq (%rdi), %rax
+; O2-NEXT:    .p2align 4, 0x90
+; O2-NEXT:  .LBB176_1: # %atomicrmw.start
+; O2-NEXT:    # =>This Inner Loop Header: Depth=1
+; O2-NEXT:    movl %eax, %ecx
+; O2-NEXT:    notl %ecx
+; O2-NEXT:  .Lpcsection178:
+; O2-NEXT:    orq $-43, %rcx
+; O2-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O2-NEXT:  .Lpcsection179:
+; O2-NEXT:    jne .LBB176_1
+; O2-NEXT:  # %bb.2: # %atomicrmw.end
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_nand_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection177:
+; O3-NEXT:    movq (%rdi), %rax
+; O3-NEXT:    .p2align 4, 0x90
+; O3-NEXT:  .LBB176_1: # %atomicrmw.start
+; O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; O3-NEXT:    movl %eax, %ecx
+; O3-NEXT:    notl %ecx
+; O3-NEXT:  .Lpcsection178:
+; O3-NEXT:    orq $-43, %rcx
+; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O3-NEXT:  .Lpcsection179:
+; O3-NEXT:    jne .LBB176_1
+; O3-NEXT:  # %bb.2: # %atomicrmw.end
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw nand ptr %a, i64 42 release, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_xchg_acq_rel(ptr %a) {
+; O0-LABEL: atomic64_xchg_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movl $42, %eax
+; O0-NEXT:  .Lpcsection264:
+; O0-NEXT:    xchgq %rax, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_xchg_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:  .Lpcsection180:
+; O1-NEXT:    xchgq %rax, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_xchg_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:  .Lpcsection180:
+; O2-NEXT:    xchgq %rax, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_xchg_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:  .Lpcsection180:
+; O3-NEXT:    xchgq %rax, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xchg ptr %a, i64 42 acq_rel, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_add_acq_rel(ptr %a) {
+; O0-LABEL: atomic64_add_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection265:
+; O0-NEXT:    lock addq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_add_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection181:
+; O1-NEXT:    lock addq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_add_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection181:
+; O2-NEXT:    lock addq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_add_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection181:
+; O3-NEXT:    lock addq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw add ptr %a, i64 42 acq_rel, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_sub_acq_rel(ptr %a) {
+; O0-LABEL: atomic64_sub_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection266:
+; O0-NEXT:    lock subq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_sub_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection182:
+; O1-NEXT:    lock subq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_sub_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection182:
+; O2-NEXT:    lock subq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_sub_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection182:
+; O3-NEXT:    lock subq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw sub ptr %a, i64 42 acq_rel, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_and_acq_rel(ptr %a) {
+; O0-LABEL: atomic64_and_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection267:
+; O0-NEXT:    lock andq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_and_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection183:
+; O1-NEXT:    lock andq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_and_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection183:
+; O2-NEXT:    lock andq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_and_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection183:
+; O3-NEXT:    lock andq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw and ptr %a, i64 42 acq_rel, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_or_acq_rel(ptr %a) {
+; O0-LABEL: atomic64_or_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection268:
+; O0-NEXT:    lock orq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_or_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection184:
+; O1-NEXT:    lock orq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_or_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection184:
+; O2-NEXT:    lock orq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_or_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection184:
+; O3-NEXT:    lock orq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw or ptr %a, i64 42 acq_rel, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_xor_acq_rel(ptr %a) {
+; O0-LABEL: atomic64_xor_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection269:
+; O0-NEXT:    lock xorq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_xor_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection185:
+; O1-NEXT:    lock xorq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_xor_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection185:
+; O2-NEXT:    lock xorq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_xor_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection185:
+; O3-NEXT:    lock xorq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xor ptr %a, i64 42 acq_rel, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_nand_acq_rel(ptr %a) {
+; O0-LABEL: atomic64_nand_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movq foo, %rax
+; O0-NEXT:  .Lpcsection270:
+; O0-NEXT:    movq (%rdi), %rax
+; O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:  .LBB183_1: # %atomicrmw.start
+; O0-NEXT:    # =>This Inner Loop Header: Depth=1
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT:    movl %eax, %esi
+; O0-NEXT:    notl %esi
+; O0-NEXT:    # implicit-def: $rdx
+; O0-NEXT:    movl %esi, %edx
+; O0-NEXT:  .Lpcsection271:
+; O0-NEXT:    orq $-43, %rdx
+; O0-NEXT:    lock cmpxchgq %rdx, (%rcx)
+; O0-NEXT:  .Lpcsection272:
+; O0-NEXT:    sete %cl
+; O0-NEXT:  .Lpcsection273:
+; O0-NEXT:    testb $1, %cl
+; O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:  .Lpcsection274:
+; O0-NEXT:    jne .LBB183_2
+; O0-NEXT:    jmp .LBB183_1
+; O0-NEXT:  .LBB183_2: # %atomicrmw.end
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_nand_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection186:
+; O1-NEXT:    movq (%rdi), %rax
+; O1-NEXT:    .p2align 4, 0x90
+; O1-NEXT:  .LBB183_1: # %atomicrmw.start
+; O1-NEXT:    # =>This Inner Loop Header: Depth=1
+; O1-NEXT:    movl %eax, %ecx
+; O1-NEXT:    notl %ecx
+; O1-NEXT:  .Lpcsection187:
+; O1-NEXT:    orq $-43, %rcx
+; O1-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O1-NEXT:  .Lpcsection188:
+; O1-NEXT:    jne .LBB183_1
+; O1-NEXT:  # %bb.2: # %atomicrmw.end
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_nand_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection186:
+; O2-NEXT:    movq (%rdi), %rax
+; O2-NEXT:    .p2align 4, 0x90
+; O2-NEXT:  .LBB183_1: # %atomicrmw.start
+; O2-NEXT:    # =>This Inner Loop Header: Depth=1
+; O2-NEXT:    movl %eax, %ecx
+; O2-NEXT:    notl %ecx
+; O2-NEXT:  .Lpcsection187:
+; O2-NEXT:    orq $-43, %rcx
+; O2-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O2-NEXT:  .Lpcsection188:
+; O2-NEXT:    jne .LBB183_1
+; O2-NEXT:  # %bb.2: # %atomicrmw.end
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_nand_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection186:
+; O3-NEXT:    movq (%rdi), %rax
+; O3-NEXT:    .p2align 4, 0x90
+; O3-NEXT:  .LBB183_1: # %atomicrmw.start
+; O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; O3-NEXT:    movl %eax, %ecx
+; O3-NEXT:    notl %ecx
+; O3-NEXT:  .Lpcsection187:
+; O3-NEXT:    orq $-43, %rcx
+; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O3-NEXT:  .Lpcsection188:
+; O3-NEXT:    jne .LBB183_1
+; O3-NEXT:  # %bb.2: # %atomicrmw.end
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw nand ptr %a, i64 42 acq_rel, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_xchg_seq_cst(ptr %a) {
+; O0-LABEL: atomic64_xchg_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movl $42, %eax
+; O0-NEXT:  .Lpcsection275:
+; O0-NEXT:    xchgq %rax, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_xchg_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:  .Lpcsection189:
+; O1-NEXT:    xchgq %rax, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_xchg_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:  .Lpcsection189:
+; O2-NEXT:    xchgq %rax, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_xchg_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:  .Lpcsection189:
+; O3-NEXT:    xchgq %rax, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xchg ptr %a, i64 42 seq_cst, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_add_seq_cst(ptr %a) {
+; O0-LABEL: atomic64_add_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection276:
+; O0-NEXT:    lock addq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_add_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection190:
+; O1-NEXT:    lock addq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_add_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection190:
+; O2-NEXT:    lock addq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_add_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection190:
+; O3-NEXT:    lock addq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw add ptr %a, i64 42 seq_cst, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_sub_seq_cst(ptr %a) {
+; O0-LABEL: atomic64_sub_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection277:
+; O0-NEXT:    lock subq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_sub_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection191:
+; O1-NEXT:    lock subq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_sub_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection191:
+; O2-NEXT:    lock subq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_sub_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection191:
+; O3-NEXT:    lock subq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw sub ptr %a, i64 42 seq_cst, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_and_seq_cst(ptr %a) {
+; O0-LABEL: atomic64_and_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection278:
+; O0-NEXT:    lock andq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_and_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection192:
+; O1-NEXT:    lock andq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_and_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection192:
+; O2-NEXT:    lock andq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_and_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection192:
+; O3-NEXT:    lock andq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw and ptr %a, i64 42 seq_cst, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_or_seq_cst(ptr %a) {
+; O0-LABEL: atomic64_or_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection279:
+; O0-NEXT:    lock orq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_or_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection193:
+; O1-NEXT:    lock orq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_or_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection193:
+; O2-NEXT:    lock orq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_or_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection193:
+; O3-NEXT:    lock orq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw or ptr %a, i64 42 seq_cst, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_xor_seq_cst(ptr %a) {
+; O0-LABEL: atomic64_xor_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:  .Lpcsection280:
+; O0-NEXT:    lock xorq $42, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_xor_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection194:
+; O1-NEXT:    lock xorq $42, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_xor_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection194:
+; O2-NEXT:    lock xorq $42, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_xor_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection194:
+; O3-NEXT:    lock xorq $42, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw xor ptr %a, i64 42 seq_cst, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_nand_seq_cst(ptr %a) {
+; O0-LABEL: atomic64_nand_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movq foo, %rax
+; O0-NEXT:  .Lpcsection281:
+; O0-NEXT:    movq (%rdi), %rax
+; O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:  .LBB190_1: # %atomicrmw.start
+; O0-NEXT:    # =>This Inner Loop Header: Depth=1
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT:    movl %eax, %esi
+; O0-NEXT:    notl %esi
+; O0-NEXT:    # implicit-def: $rdx
+; O0-NEXT:    movl %esi, %edx
+; O0-NEXT:  .Lpcsection282:
+; O0-NEXT:    orq $-43, %rdx
+; O0-NEXT:    lock cmpxchgq %rdx, (%rcx)
+; O0-NEXT:  .Lpcsection283:
+; O0-NEXT:    sete %cl
+; O0-NEXT:  .Lpcsection284:
+; O0-NEXT:    testb $1, %cl
+; O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:  .Lpcsection285:
+; O0-NEXT:    jne .LBB190_2
+; O0-NEXT:    jmp .LBB190_1
+; O0-NEXT:  .LBB190_2: # %atomicrmw.end
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_nand_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:  .Lpcsection195:
+; O1-NEXT:    movq (%rdi), %rax
+; O1-NEXT:    .p2align 4, 0x90
+; O1-NEXT:  .LBB190_1: # %atomicrmw.start
+; O1-NEXT:    # =>This Inner Loop Header: Depth=1
+; O1-NEXT:    movl %eax, %ecx
+; O1-NEXT:    notl %ecx
+; O1-NEXT:  .Lpcsection196:
+; O1-NEXT:    orq $-43, %rcx
+; O1-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O1-NEXT:  .Lpcsection197:
+; O1-NEXT:    jne .LBB190_1
+; O1-NEXT:  # %bb.2: # %atomicrmw.end
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_nand_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:  .Lpcsection195:
+; O2-NEXT:    movq (%rdi), %rax
+; O2-NEXT:    .p2align 4, 0x90
+; O2-NEXT:  .LBB190_1: # %atomicrmw.start
+; O2-NEXT:    # =>This Inner Loop Header: Depth=1
+; O2-NEXT:    movl %eax, %ecx
+; O2-NEXT:    notl %ecx
+; O2-NEXT:  .Lpcsection196:
+; O2-NEXT:    orq $-43, %rcx
+; O2-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O2-NEXT:  .Lpcsection197:
+; O2-NEXT:    jne .LBB190_1
+; O2-NEXT:  # %bb.2: # %atomicrmw.end
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_nand_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:  .Lpcsection195:
+; O3-NEXT:    movq (%rdi), %rax
+; O3-NEXT:    .p2align 4, 0x90
+; O3-NEXT:  .LBB190_1: # %atomicrmw.start
+; O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; O3-NEXT:    movl %eax, %ecx
+; O3-NEXT:    notl %ecx
+; O3-NEXT:  .Lpcsection196:
+; O3-NEXT:    orq $-43, %rcx
+; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O3-NEXT:  .Lpcsection197:
+; O3-NEXT:    jne .LBB190_1
+; O3-NEXT:  # %bb.2: # %atomicrmw.end
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = atomicrmw nand ptr %a, i64 42 seq_cst, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_cas_monotonic(ptr %a) {
+; O0-LABEL: atomic64_cas_monotonic:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movl $42, %eax
+; O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movl $1, %ecx
+; O0-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O0-NEXT:  .Lpcsection286:
+; O0-NEXT:    # kill: def $rdx killed $rax
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O0-NEXT:  .Lpcsection287:
+; O0-NEXT:    # kill: def $rdx killed $rax
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_cas_monotonic:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movl $1, %ecx
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_cas_monotonic:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movl $1, %ecx
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_cas_monotonic:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movl $1, %ecx
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = cmpxchg ptr %a, i64 42, i64 1 monotonic monotonic, align 8, !pcsections !0
+  %y = cmpxchg ptr %a, i64 42, i64 1 monotonic acquire, align 8, !pcsections !0
+  %z = cmpxchg ptr %a, i64 42, i64 1 monotonic seq_cst, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_cas_acquire(ptr %a) {
+; O0-LABEL: atomic64_cas_acquire:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movl $42, %eax
+; O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movl $1, %ecx
+; O0-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O0-NEXT:  .Lpcsection288:
+; O0-NEXT:    # kill: def $rdx killed $rax
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O0-NEXT:  .Lpcsection289:
+; O0-NEXT:    # kill: def $rdx killed $rax
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_cas_acquire:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movl $1, %ecx
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_cas_acquire:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movl $1, %ecx
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_cas_acquire:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movl $1, %ecx
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = cmpxchg ptr %a, i64 42, i64 1 acquire monotonic, align 8, !pcsections !0
+  %y = cmpxchg ptr %a, i64 42, i64 1 acquire acquire, align 8, !pcsections !0
+  %z = cmpxchg ptr %a, i64 42, i64 1 acquire seq_cst, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_cas_release(ptr %a) {
+; O0-LABEL: atomic64_cas_release:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movl $42, %eax
+; O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movl $1, %ecx
+; O0-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O0-NEXT:  .Lpcsection290:
+; O0-NEXT:    # kill: def $rdx killed $rax
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O0-NEXT:  .Lpcsection291:
+; O0-NEXT:    # kill: def $rdx killed $rax
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_cas_release:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movl $1, %ecx
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_cas_release:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movl $1, %ecx
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_cas_release:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movl $1, %ecx
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = cmpxchg ptr %a, i64 42, i64 1 release monotonic, align 8, !pcsections !0
+  %y = cmpxchg ptr %a, i64 42, i64 1 release acquire, align 8, !pcsections !0
+  %z = cmpxchg ptr %a, i64 42, i64 1 release seq_cst, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_cas_acq_rel(ptr %a) {
+; O0-LABEL: atomic64_cas_acq_rel:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movl $42, %eax
+; O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movl $1, %ecx
+; O0-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O0-NEXT:  .Lpcsection292:
+; O0-NEXT:    # kill: def $rdx killed $rax
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O0-NEXT:  .Lpcsection293:
+; O0-NEXT:    # kill: def $rdx killed $rax
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_cas_acq_rel:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movl $1, %ecx
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_cas_acq_rel:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movl $1, %ecx
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_cas_acq_rel:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movl $1, %ecx
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = cmpxchg ptr %a, i64 42, i64 1 acq_rel monotonic, align 8, !pcsections !0
+  %y = cmpxchg ptr %a, i64 42, i64 1 acq_rel acquire, align 8, !pcsections !0
+  %z = cmpxchg ptr %a, i64 42, i64 1 acq_rel seq_cst, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_cas_seq_cst(ptr %a) {
+; O0-LABEL: atomic64_cas_seq_cst:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq foo(%rip), %rax
+; O0-NEXT:    movl $42, %eax
+; O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT:    movl $1, %ecx
+; O0-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O0-NEXT:  .Lpcsection294:
+; O0-NEXT:    # kill: def $rdx killed $rax
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O0-NEXT:  .Lpcsection295:
+; O0-NEXT:    # kill: def $rdx killed $rax
+; O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O0-NEXT:    movq $3, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_cas_seq_cst:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq foo(%rip), %rax
+; O1-NEXT:    movl $1, %ecx
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O1-NEXT:    movl $42, %eax
+; O1-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O1-NEXT:    movq $3, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_cas_seq_cst:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq foo(%rip), %rax
+; O2-NEXT:    movl $1, %ecx
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O2-NEXT:    movl $42, %eax
+; O2-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O2-NEXT:    movq $3, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_cas_seq_cst:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq foo(%rip), %rax
+; O3-NEXT:    movl $1, %ecx
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O3-NEXT:    movl $42, %eax
+; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; O3-NEXT:    movq $3, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = cmpxchg ptr %a, i64 42, i64 1 seq_cst monotonic, align 8, !pcsections !0
+  %y = cmpxchg ptr %a, i64 42, i64 1 seq_cst acquire, align 8, !pcsections !0
+  %z = cmpxchg ptr %a, i64 42, i64 1 seq_cst seq_cst, align 8, !pcsections !0
+  store volatile i64 3, ptr @foo, align 8
+  ret void
+}
+
+define void @atomic64_cas_seq_cst_ptr_ty(ptr %a, ptr %v1, ptr %v2) {
+; O0-LABEL: atomic64_cas_seq_cst_ptr_ty:
+; O0:       # %bb.0: # %entry
+; O0-NEXT:    movq %rsi, %rax
+; O0-NEXT:    movq foo(%rip), %rcx
+; O0-NEXT:    lock cmpxchgq %rdx, (%rdi)
+; O0-NEXT:    movq $1, foo
+; O0-NEXT:    retq
+;
+; O1-LABEL: atomic64_cas_seq_cst_ptr_ty:
+; O1:       # %bb.0: # %entry
+; O1-NEXT:    movq %rsi, %rax
+; O1-NEXT:    movq foo(%rip), %rcx
+; O1-NEXT:    lock cmpxchgq %rdx, (%rdi)
+; O1-NEXT:    movq $1, foo(%rip)
+; O1-NEXT:    retq
+;
+; O2-LABEL: atomic64_cas_seq_cst_ptr_ty:
+; O2:       # %bb.0: # %entry
+; O2-NEXT:    movq %rsi, %rax
+; O2-NEXT:    movq foo(%rip), %rcx
+; O2-NEXT:    lock cmpxchgq %rdx, (%rdi)
+; O2-NEXT:    movq $1, foo(%rip)
+; O2-NEXT:    retq
+;
+; O3-LABEL: atomic64_cas_seq_cst_ptr_ty:
+; O3:       # %bb.0: # %entry
+; O3-NEXT:    movq %rsi, %rax
+; O3-NEXT:    movq foo(%rip), %rcx
+; O3-NEXT:    lock cmpxchgq %rdx, (%rdi)
+; O3-NEXT:    movq $1, foo(%rip)
+; O3-NEXT:    retq
+entry:
+  load volatile i64, ptr @foo, align 8
+  %x = cmpxchg ptr %a, ptr %v1, ptr %v2 seq_cst seq_cst, align 8, !pcsections !0
+  store volatile i64 1, ptr @foo, align 8
   ret void
 }