[llvm] [SelectionDAG] Fix copyExtraInfo where new node has entry as operand (PR #149307)

Marco Elver via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 17 06:32:09 PDT 2025


https://github.com/melver created https://github.com/llvm/llvm-project/pull/149307

Add special case handling where a new replacement node has the entry node as an operand i.e. does not depend on any other nodes.

This can be observed with the existing X86/pcsections-atomics.ll test case when targeting Haswell, where certain 128-bit atomics are transformed into arch-specific instructions, with some operands having no other dependencies.

>From e1ce4de038861fd0711ed144ece4e2ad0a38ac69 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver at google.com>
Date: Thu, 17 Jul 2025 15:17:33 +0200
Subject: [PATCH] [SelectionDAG] Fix copyExtraInfo where new node has entry as
 operand

Add special case handling where a new replacement node has the entry
node as an operand i.e. does not depend on any other nodes.

This can be observed with the existing X86/pcsections-atomics.ll test
case when targeting Haswell, where certain 128-bit atomics are
transformed into arch-specific instructions, with some operands having
no other dependencies.
---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |    9 +-
 llvm/test/CodeGen/X86/pcsections-atomics.ll   | 3378 +++++++++++++++++
 2 files changed, 3386 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 70a39eab1e720..682d93d0abf3f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -13872,6 +13872,8 @@ void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) {
     return;
   }
 
+  const SDNode *EntrySDN = getEntryNode().getNode();
+
   // We need to copy NodeExtraInfo to all _new_ nodes that are being introduced
   // through the replacement of From with To. Otherwise, replacements of a node
   // (From) with more complex nodes (To and its operands) may result in lost
@@ -13903,9 +13905,14 @@ void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) {
       return true;
     if (!Visited.insert(N).second)
       return true;
-    if (getEntryNode().getNode() == N)
+    if (EntrySDN == N)
       return false;
     for (const SDValue &Op : N->op_values()) {
+      if (N == To && Op.getNode() == EntrySDN) {
+        // Special case: New node's operand is the entry node; just need to
+        // copy extra info to new node.
+        break;
+      }
       if (!Self(Self, Op.getNode()))
         return false;
     }
diff --git a/llvm/test/CodeGen/X86/pcsections-atomics.ll b/llvm/test/CodeGen/X86/pcsections-atomics.ll
index 672ebc1ec7275..69ae1f19f3200 100644
--- a/llvm/test/CodeGen/X86/pcsections-atomics.ll
+++ b/llvm/test/CodeGen/X86/pcsections-atomics.ll
@@ -9,6 +9,7 @@
 ; RUN: llc -O1 -mattr=cx16 < %s | FileCheck %s --check-prefixes=O1
 ; RUN: llc -O2 -mattr=cx16 < %s | FileCheck %s --check-prefixes=O2
 ; RUN: llc -O3 -mattr=cx16 < %s | FileCheck %s --check-prefixes=O3
+; RUN: llc -O3 -mcpu=haswell -mattr=cx16 < %s | FileCheck %s --check-prefixes=HASWELL-O3
 
 target triple = "x86_64-unknown-linux-gnu"
 
@@ -50,6 +51,14 @@ define void @mixed_atomic_non_atomic(ptr %a) {
 ; O3-NEXT:    movl $1, (%rdi)
 ; O3-NEXT:    decl (%rdi)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: mixed_atomic_non_atomic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    incl (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection0:
+; HASWELL-O3-NEXT:    movl $1, (%rdi)
+; HASWELL-O3-NEXT:    decl (%rdi)
+; HASWELL-O3-NEXT:    retq
 entry:
   ; Accesses the same location atomically and non-atomically.
   %0 = load volatile i32, ptr %a, align 4
@@ -107,6 +116,17 @@ define i64 @mixed_complex_atomic_non_atomic(ptr %a, ptr %b) {
 ; O3-NEXT:    movq %rdx, (%rsi)
 ; O3-NEXT:    addq %rcx, %rax
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: mixed_complex_atomic_non_atomic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movl $1, %eax
+; HASWELL-O3-NEXT:  .Lpcsection1:
+; HASWELL-O3-NEXT:    lock xaddq %rax, (%rdi)
+; HASWELL-O3-NEXT:    movq (%rsi), %rcx
+; HASWELL-O3-NEXT:    leaq 1(%rcx), %rdx
+; HASWELL-O3-NEXT:    movq %rdx, (%rsi)
+; HASWELL-O3-NEXT:    addq %rcx, %rax
+; HASWELL-O3-NEXT:    retq
 entry:
   %0 = atomicrmw add ptr %a, i64 1 monotonic, align 8, !pcsections !0
   %1 = load i64, ptr %b, align 8
@@ -148,6 +168,14 @@ define i8 @atomic8_load_unordered(ptr %a) {
 ; O3-NEXT:    movzbl (%rdi), %eax
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_load_unordered:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection2:
+; HASWELL-O3-NEXT:    movzbl (%rdi), %eax
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = load atomic i8, ptr %a unordered, align 1, !pcsections !0
@@ -187,6 +215,14 @@ define i8 @atomic8_load_monotonic(ptr %a) {
 ; O3-NEXT:    movzbl (%rdi), %eax
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_load_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection3:
+; HASWELL-O3-NEXT:    movzbl (%rdi), %eax
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = load atomic i8, ptr %a monotonic, align 1, !pcsections !0
@@ -226,6 +262,14 @@ define i8 @atomic8_load_acquire(ptr %a) {
 ; O3-NEXT:    movzbl (%rdi), %eax
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_load_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection4:
+; HASWELL-O3-NEXT:    movzbl (%rdi), %eax
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = load atomic i8, ptr %a acquire, align 1, !pcsections !0
@@ -265,6 +309,14 @@ define i8 @atomic8_load_seq_cst(ptr %a) {
 ; O3-NEXT:    movzbl (%rdi), %eax
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_load_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection5:
+; HASWELL-O3-NEXT:    movzbl (%rdi), %eax
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = load atomic i8, ptr %a seq_cst, align 1, !pcsections !0
@@ -304,6 +356,14 @@ define void @atomic8_store_unordered(ptr %a) {
 ; O3-NEXT:    movb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_store_unordered:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection6:
+; HASWELL-O3-NEXT:    movb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   store atomic i8 42, ptr %a unordered, align 1, !pcsections !0
@@ -343,6 +403,14 @@ define void @atomic8_store_monotonic(ptr %a) {
 ; O3-NEXT:    movb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_store_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection7:
+; HASWELL-O3-NEXT:    movb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   store atomic i8 42, ptr %a monotonic, align 1, !pcsections !0
@@ -382,6 +450,14 @@ define void @atomic8_store_release(ptr %a) {
 ; O3-NEXT:    movb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_store_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection8:
+; HASWELL-O3-NEXT:    movb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   store atomic i8 42, ptr %a release, align 1, !pcsections !0
@@ -425,6 +501,15 @@ define void @atomic8_store_seq_cst(ptr %a) {
 ; O3-NEXT:    xchgb %al, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_store_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movb $42, %al
+; HASWELL-O3-NEXT:  .Lpcsection9:
+; HASWELL-O3-NEXT:    xchgb %al, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   store atomic i8 42, ptr %a seq_cst, align 1, !pcsections !0
@@ -468,6 +553,15 @@ define void @atomic8_xchg_monotonic(ptr %a) {
 ; O3-NEXT:    xchgb %al, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_xchg_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movb $42, %al
+; HASWELL-O3-NEXT:  .Lpcsection10:
+; HASWELL-O3-NEXT:    xchgb %al, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -507,6 +601,14 @@ define void @atomic8_add_monotonic(ptr %a) {
 ; O3-NEXT:    lock addb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_add_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection11:
+; HASWELL-O3-NEXT:    lock addb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -546,6 +648,14 @@ define void @atomic8_sub_monotonic(ptr %a) {
 ; O3-NEXT:    lock subb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_sub_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection12:
+; HASWELL-O3-NEXT:    lock subb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -585,6 +695,14 @@ define void @atomic8_and_monotonic(ptr %a) {
 ; O3-NEXT:    lock andb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_and_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection13:
+; HASWELL-O3-NEXT:    lock andb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -624,6 +742,14 @@ define void @atomic8_or_monotonic(ptr %a) {
 ; O3-NEXT:    lock orb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_or_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection14:
+; HASWELL-O3-NEXT:    lock orb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -663,6 +789,14 @@ define void @atomic8_xor_monotonic(ptr %a) {
 ; O3-NEXT:    lock xorb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_xor_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection15:
+; HASWELL-O3-NEXT:    lock xorb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -763,6 +897,27 @@ define void @atomic8_nand_monotonic(ptr %a) {
 ; O3-NEXT:  # %bb.2: # %atomicrmw.end
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_nand_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection16:
+; HASWELL-O3-NEXT:    movzbl (%rdi), %eax
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB16_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection17:
+; HASWELL-O3-NEXT:    notb %cl
+; HASWELL-O3-NEXT:  .Lpcsection18:
+; HASWELL-O3-NEXT:    orb $-43, %cl
+; HASWELL-O3-NEXT:  .Lpcsection19:
+; HASWELL-O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection20:
+; HASWELL-O3-NEXT:    jne .LBB16_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -806,6 +961,15 @@ define void @atomic8_xchg_acquire(ptr %a) {
 ; O3-NEXT:    xchgb %al, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_xchg_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movb $42, %al
+; HASWELL-O3-NEXT:  .Lpcsection21:
+; HASWELL-O3-NEXT:    xchgb %al, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -845,6 +1009,14 @@ define void @atomic8_add_acquire(ptr %a) {
 ; O3-NEXT:    lock addb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_add_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection22:
+; HASWELL-O3-NEXT:    lock addb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -884,6 +1056,14 @@ define void @atomic8_sub_acquire(ptr %a) {
 ; O3-NEXT:    lock subb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_sub_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection23:
+; HASWELL-O3-NEXT:    lock subb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -923,6 +1103,14 @@ define void @atomic8_and_acquire(ptr %a) {
 ; O3-NEXT:    lock andb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_and_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection24:
+; HASWELL-O3-NEXT:    lock andb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -962,6 +1150,14 @@ define void @atomic8_or_acquire(ptr %a) {
 ; O3-NEXT:    lock orb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_or_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection25:
+; HASWELL-O3-NEXT:    lock orb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -1001,6 +1197,14 @@ define void @atomic8_xor_acquire(ptr %a) {
 ; O3-NEXT:    lock xorb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_xor_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection26:
+; HASWELL-O3-NEXT:    lock xorb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -1101,6 +1305,27 @@ define void @atomic8_nand_acquire(ptr %a) {
 ; O3-NEXT:  # %bb.2: # %atomicrmw.end
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_nand_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection27:
+; HASWELL-O3-NEXT:    movzbl (%rdi), %eax
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB23_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection28:
+; HASWELL-O3-NEXT:    notb %cl
+; HASWELL-O3-NEXT:  .Lpcsection29:
+; HASWELL-O3-NEXT:    orb $-43, %cl
+; HASWELL-O3-NEXT:  .Lpcsection30:
+; HASWELL-O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection31:
+; HASWELL-O3-NEXT:    jne .LBB23_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -1144,6 +1369,15 @@ define void @atomic8_xchg_release(ptr %a) {
 ; O3-NEXT:    xchgb %al, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_xchg_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movb $42, %al
+; HASWELL-O3-NEXT:  .Lpcsection32:
+; HASWELL-O3-NEXT:    xchgb %al, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1183,6 +1417,14 @@ define void @atomic8_add_release(ptr %a) {
 ; O3-NEXT:    lock addb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_add_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection33:
+; HASWELL-O3-NEXT:    lock addb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1222,6 +1464,14 @@ define void @atomic8_sub_release(ptr %a) {
 ; O3-NEXT:    lock subb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_sub_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection34:
+; HASWELL-O3-NEXT:    lock subb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1261,6 +1511,14 @@ define void @atomic8_and_release(ptr %a) {
 ; O3-NEXT:    lock andb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_and_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection35:
+; HASWELL-O3-NEXT:    lock andb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1300,6 +1558,14 @@ define void @atomic8_or_release(ptr %a) {
 ; O3-NEXT:    lock orb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_or_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection36:
+; HASWELL-O3-NEXT:    lock orb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1339,6 +1605,14 @@ define void @atomic8_xor_release(ptr %a) {
 ; O3-NEXT:    lock xorb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_xor_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection37:
+; HASWELL-O3-NEXT:    lock xorb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1439,6 +1713,27 @@ define void @atomic8_nand_release(ptr %a) {
 ; O3-NEXT:  # %bb.2: # %atomicrmw.end
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_nand_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection38:
+; HASWELL-O3-NEXT:    movzbl (%rdi), %eax
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB30_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection39:
+; HASWELL-O3-NEXT:    notb %cl
+; HASWELL-O3-NEXT:  .Lpcsection40:
+; HASWELL-O3-NEXT:    orb $-43, %cl
+; HASWELL-O3-NEXT:  .Lpcsection41:
+; HASWELL-O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection42:
+; HASWELL-O3-NEXT:    jne .LBB30_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1482,6 +1777,15 @@ define void @atomic8_xchg_acq_rel(ptr %a) {
 ; O3-NEXT:    xchgb %al, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_xchg_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movb $42, %al
+; HASWELL-O3-NEXT:  .Lpcsection43:
+; HASWELL-O3-NEXT:    xchgb %al, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i8 42 acq_rel, align 1, !pcsections !0
@@ -1521,6 +1825,14 @@ define void @atomic8_add_acq_rel(ptr %a) {
 ; O3-NEXT:    lock addb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_add_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection44:
+; HASWELL-O3-NEXT:    lock addb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i8 42 acq_rel, align 1, !pcsections !0
@@ -1560,6 +1872,14 @@ define void @atomic8_sub_acq_rel(ptr %a) {
 ; O3-NEXT:    lock subb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_sub_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection45:
+; HASWELL-O3-NEXT:    lock subb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i8 42 acq_rel, align 1, !pcsections !0
@@ -1599,6 +1919,14 @@ define void @atomic8_and_acq_rel(ptr %a) {
 ; O3-NEXT:    lock andb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_and_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection46:
+; HASWELL-O3-NEXT:    lock andb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i8 42 acq_rel, align 1, !pcsections !0
@@ -1638,6 +1966,14 @@ define void @atomic8_or_acq_rel(ptr %a) {
 ; O3-NEXT:    lock orb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_or_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection47:
+; HASWELL-O3-NEXT:    lock orb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i8 42 acq_rel, align 1, !pcsections !0
@@ -1677,6 +2013,14 @@ define void @atomic8_xor_acq_rel(ptr %a) {
 ; O3-NEXT:    lock xorb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_xor_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection48:
+; HASWELL-O3-NEXT:    lock xorb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i8 42 acq_rel, align 1, !pcsections !0
@@ -1777,6 +2121,27 @@ define void @atomic8_nand_acq_rel(ptr %a) {
 ; O3-NEXT:  # %bb.2: # %atomicrmw.end
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_nand_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection49:
+; HASWELL-O3-NEXT:    movzbl (%rdi), %eax
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB37_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection50:
+; HASWELL-O3-NEXT:    notb %cl
+; HASWELL-O3-NEXT:  .Lpcsection51:
+; HASWELL-O3-NEXT:    orb $-43, %cl
+; HASWELL-O3-NEXT:  .Lpcsection52:
+; HASWELL-O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection53:
+; HASWELL-O3-NEXT:    jne .LBB37_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i8 42 acq_rel, align 1, !pcsections !0
@@ -1820,6 +2185,15 @@ define void @atomic8_xchg_seq_cst(ptr %a) {
 ; O3-NEXT:    xchgb %al, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_xchg_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movb $42, %al
+; HASWELL-O3-NEXT:  .Lpcsection54:
+; HASWELL-O3-NEXT:    xchgb %al, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i8 42 seq_cst, align 1, !pcsections !0
@@ -1859,6 +2233,14 @@ define void @atomic8_add_seq_cst(ptr %a) {
 ; O3-NEXT:    lock addb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_add_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection55:
+; HASWELL-O3-NEXT:    lock addb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i8 42 seq_cst, align 1, !pcsections !0
@@ -1898,6 +2280,14 @@ define void @atomic8_sub_seq_cst(ptr %a) {
 ; O3-NEXT:    lock subb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_sub_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection56:
+; HASWELL-O3-NEXT:    lock subb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i8 42 seq_cst, align 1, !pcsections !0
@@ -1937,6 +2327,14 @@ define void @atomic8_and_seq_cst(ptr %a) {
 ; O3-NEXT:    lock andb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_and_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection57:
+; HASWELL-O3-NEXT:    lock andb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i8 42 seq_cst, align 1, !pcsections !0
@@ -1976,6 +2374,14 @@ define void @atomic8_or_seq_cst(ptr %a) {
 ; O3-NEXT:    lock orb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_or_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection58:
+; HASWELL-O3-NEXT:    lock orb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i8 42 seq_cst, align 1, !pcsections !0
@@ -2015,6 +2421,14 @@ define void @atomic8_xor_seq_cst(ptr %a) {
 ; O3-NEXT:    lock xorb $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_xor_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection59:
+; HASWELL-O3-NEXT:    lock xorb $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i8 42 seq_cst, align 1, !pcsections !0
@@ -2115,6 +2529,27 @@ define void @atomic8_nand_seq_cst(ptr %a) {
 ; O3-NEXT:  # %bb.2: # %atomicrmw.end
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_nand_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection60:
+; HASWELL-O3-NEXT:    movzbl (%rdi), %eax
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB44_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection61:
+; HASWELL-O3-NEXT:    notb %cl
+; HASWELL-O3-NEXT:  .Lpcsection62:
+; HASWELL-O3-NEXT:    orb $-43, %cl
+; HASWELL-O3-NEXT:  .Lpcsection63:
+; HASWELL-O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection64:
+; HASWELL-O3-NEXT:    jne .LBB44_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i8 42 seq_cst, align 1, !pcsections !0
@@ -2200,6 +2635,25 @@ define void @atomic8_cas_monotonic(ptr %a) {
 ; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_cas_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movb $1, %cl
+; HASWELL-O3-NEXT:  .Lpcsection65:
+; HASWELL-O3-NEXT:    movb $42, %al
+; HASWELL-O3-NEXT:  .Lpcsection66:
+; HASWELL-O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection67:
+; HASWELL-O3-NEXT:    movb $42, %al
+; HASWELL-O3-NEXT:  .Lpcsection68:
+; HASWELL-O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection69:
+; HASWELL-O3-NEXT:    movb $42, %al
+; HASWELL-O3-NEXT:  .Lpcsection70:
+; HASWELL-O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i8 42, i8 1 monotonic monotonic, align 1, !pcsections !0
@@ -2287,6 +2741,25 @@ define void @atomic8_cas_acquire(ptr %a) {
 ; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_cas_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movb $1, %cl
+; HASWELL-O3-NEXT:  .Lpcsection71:
+; HASWELL-O3-NEXT:    movb $42, %al
+; HASWELL-O3-NEXT:  .Lpcsection72:
+; HASWELL-O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection73:
+; HASWELL-O3-NEXT:    movb $42, %al
+; HASWELL-O3-NEXT:  .Lpcsection74:
+; HASWELL-O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection75:
+; HASWELL-O3-NEXT:    movb $42, %al
+; HASWELL-O3-NEXT:  .Lpcsection76:
+; HASWELL-O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i8 42, i8 1 acquire monotonic, align 1, !pcsections !0
@@ -2374,6 +2847,25 @@ define void @atomic8_cas_release(ptr %a) {
 ; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_cas_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movb $1, %cl
+; HASWELL-O3-NEXT:  .Lpcsection77:
+; HASWELL-O3-NEXT:    movb $42, %al
+; HASWELL-O3-NEXT:  .Lpcsection78:
+; HASWELL-O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection79:
+; HASWELL-O3-NEXT:    movb $42, %al
+; HASWELL-O3-NEXT:  .Lpcsection80:
+; HASWELL-O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection81:
+; HASWELL-O3-NEXT:    movb $42, %al
+; HASWELL-O3-NEXT:  .Lpcsection82:
+; HASWELL-O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i8 42, i8 1 release monotonic, align 1, !pcsections !0
@@ -2461,6 +2953,25 @@ define void @atomic8_cas_acq_rel(ptr %a) {
 ; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_cas_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movb $1, %cl
+; HASWELL-O3-NEXT:  .Lpcsection83:
+; HASWELL-O3-NEXT:    movb $42, %al
+; HASWELL-O3-NEXT:  .Lpcsection84:
+; HASWELL-O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection85:
+; HASWELL-O3-NEXT:    movb $42, %al
+; HASWELL-O3-NEXT:  .Lpcsection86:
+; HASWELL-O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection87:
+; HASWELL-O3-NEXT:    movb $42, %al
+; HASWELL-O3-NEXT:  .Lpcsection88:
+; HASWELL-O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i8 42, i8 1 acq_rel monotonic, align 1, !pcsections !0
@@ -2548,6 +3059,25 @@ define void @atomic8_cas_seq_cst(ptr %a) {
 ; O3-NEXT:    lock cmpxchgb %cl, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic8_cas_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movb $1, %cl
+; HASWELL-O3-NEXT:  .Lpcsection89:
+; HASWELL-O3-NEXT:    movb $42, %al
+; HASWELL-O3-NEXT:  .Lpcsection90:
+; HASWELL-O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection91:
+; HASWELL-O3-NEXT:    movb $42, %al
+; HASWELL-O3-NEXT:  .Lpcsection92:
+; HASWELL-O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection93:
+; HASWELL-O3-NEXT:    movb $42, %al
+; HASWELL-O3-NEXT:  .Lpcsection94:
+; HASWELL-O3-NEXT:    lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i8 42, i8 1 seq_cst monotonic, align 1, !pcsections !0
@@ -2589,6 +3119,14 @@ define i16 @atomic16_load_unordered(ptr %a) {
 ; O3-NEXT:    movzwl (%rdi), %eax
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_load_unordered:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection95:
+; HASWELL-O3-NEXT:    movzwl (%rdi), %eax
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = load atomic i16, ptr %a unordered, align 2, !pcsections !0
@@ -2628,6 +3166,14 @@ define i16 @atomic16_load_monotonic(ptr %a) {
 ; O3-NEXT:    movzwl (%rdi), %eax
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_load_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection96:
+; HASWELL-O3-NEXT:    movzwl (%rdi), %eax
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = load atomic i16, ptr %a monotonic, align 2, !pcsections !0
@@ -2667,6 +3213,14 @@ define i16 @atomic16_load_acquire(ptr %a) {
 ; O3-NEXT:    movzwl (%rdi), %eax
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_load_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection97:
+; HASWELL-O3-NEXT:    movzwl (%rdi), %eax
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = load atomic i16, ptr %a acquire, align 2, !pcsections !0
@@ -2706,6 +3260,14 @@ define i16 @atomic16_load_seq_cst(ptr %a) {
 ; O3-NEXT:    movzwl (%rdi), %eax
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_load_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection98:
+; HASWELL-O3-NEXT:    movzwl (%rdi), %eax
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = load atomic i16, ptr %a seq_cst, align 2, !pcsections !0
@@ -2745,6 +3307,14 @@ define void @atomic16_store_unordered(ptr %a) {
 ; O3-NEXT:    movw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_store_unordered:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection99:
+; HASWELL-O3-NEXT:    movw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   store atomic i16 42, ptr %a unordered, align 2, !pcsections !0
@@ -2784,6 +3354,14 @@ define void @atomic16_store_monotonic(ptr %a) {
 ; O3-NEXT:    movw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_store_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection100:
+; HASWELL-O3-NEXT:    movw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   store atomic i16 42, ptr %a monotonic, align 2, !pcsections !0
@@ -2823,6 +3401,14 @@ define void @atomic16_store_release(ptr %a) {
 ; O3-NEXT:    movw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_store_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection101:
+; HASWELL-O3-NEXT:    movw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   store atomic i16 42, ptr %a release, align 2, !pcsections !0
@@ -2866,6 +3452,15 @@ define void @atomic16_store_seq_cst(ptr %a) {
 ; O3-NEXT:    xchgw %ax, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_store_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movw $42, %ax
+; HASWELL-O3-NEXT:  .Lpcsection102:
+; HASWELL-O3-NEXT:    xchgw %ax, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   store atomic i16 42, ptr %a seq_cst, align 2, !pcsections !0
@@ -2909,6 +3504,15 @@ define void @atomic16_xchg_monotonic(ptr %a) {
 ; O3-NEXT:    xchgw %ax, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_xchg_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movw $42, %ax
+; HASWELL-O3-NEXT:  .Lpcsection103:
+; HASWELL-O3-NEXT:    xchgw %ax, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i16 42 monotonic, align 2, !pcsections !0
@@ -2948,6 +3552,14 @@ define void @atomic16_add_monotonic(ptr %a) {
 ; O3-NEXT:    lock addw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_add_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection104:
+; HASWELL-O3-NEXT:    lock addw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i16 42 monotonic, align 2, !pcsections !0
@@ -2987,6 +3599,14 @@ define void @atomic16_sub_monotonic(ptr %a) {
 ; O3-NEXT:    lock subw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_sub_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection105:
+; HASWELL-O3-NEXT:    lock subw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i16 42 monotonic, align 2, !pcsections !0
@@ -3026,6 +3646,14 @@ define void @atomic16_and_monotonic(ptr %a) {
 ; O3-NEXT:    lock andw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_and_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection106:
+; HASWELL-O3-NEXT:    lock andw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i16 42 monotonic, align 2, !pcsections !0
@@ -3065,6 +3693,14 @@ define void @atomic16_or_monotonic(ptr %a) {
 ; O3-NEXT:    lock orw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_or_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection107:
+; HASWELL-O3-NEXT:    lock orw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i16 42 monotonic, align 2, !pcsections !0
@@ -3104,6 +3740,14 @@ define void @atomic16_xor_monotonic(ptr %a) {
 ; O3-NEXT:    lock xorw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_xor_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection108:
+; HASWELL-O3-NEXT:    lock xorw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i16 42 monotonic, align 2, !pcsections !0
@@ -3220,6 +3864,31 @@ define void @atomic16_nand_monotonic(ptr %a) {
 ; O3-NEXT:  # %bb.2: # %atomicrmw.end
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_nand_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection109:
+; HASWELL-O3-NEXT:    movzwl (%rdi), %eax
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB64_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection110:
+; HASWELL-O3-NEXT:    notl %ecx
+; HASWELL-O3-NEXT:  .Lpcsection111:
+; HASWELL-O3-NEXT:    orl $65493, %ecx # imm = 0xFFD5
+; HASWELL-O3-NEXT:  .Lpcsection112:
+; HASWELL-O3-NEXT:    # kill: def $ax killed $ax killed $eax
+; HASWELL-O3-NEXT:  .Lpcsection113:
+; HASWELL-O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection114:
+; HASWELL-O3-NEXT:    # kill: def $ax killed $ax def $eax
+; HASWELL-O3-NEXT:  .Lpcsection115:
+; HASWELL-O3-NEXT:    jne .LBB64_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i16 42 monotonic, align 2, !pcsections !0
@@ -3263,6 +3932,15 @@ define void @atomic16_xchg_acquire(ptr %a) {
 ; O3-NEXT:    xchgw %ax, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_xchg_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movw $42, %ax
+; HASWELL-O3-NEXT:  .Lpcsection116:
+; HASWELL-O3-NEXT:    xchgw %ax, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i16 42 acquire, align 2, !pcsections !0
@@ -3302,6 +3980,14 @@ define void @atomic16_add_acquire(ptr %a) {
 ; O3-NEXT:    lock addw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_add_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection117:
+; HASWELL-O3-NEXT:    lock addw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i16 42 acquire, align 2, !pcsections !0
@@ -3341,6 +4027,14 @@ define void @atomic16_sub_acquire(ptr %a) {
 ; O3-NEXT:    lock subw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_sub_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection118:
+; HASWELL-O3-NEXT:    lock subw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i16 42 acquire, align 2, !pcsections !0
@@ -3380,6 +4074,14 @@ define void @atomic16_and_acquire(ptr %a) {
 ; O3-NEXT:    lock andw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_and_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection119:
+; HASWELL-O3-NEXT:    lock andw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i16 42 acquire, align 2, !pcsections !0
@@ -3419,6 +4121,14 @@ define void @atomic16_or_acquire(ptr %a) {
 ; O3-NEXT:    lock orw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_or_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection120:
+; HASWELL-O3-NEXT:    lock orw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i16 42 acquire, align 2, !pcsections !0
@@ -3458,6 +4168,14 @@ define void @atomic16_xor_acquire(ptr %a) {
 ; O3-NEXT:    lock xorw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_xor_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection121:
+; HASWELL-O3-NEXT:    lock xorw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i16 42 acquire, align 2, !pcsections !0
@@ -3574,6 +4292,31 @@ define void @atomic16_nand_acquire(ptr %a) {
 ; O3-NEXT:  # %bb.2: # %atomicrmw.end
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_nand_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection122:
+; HASWELL-O3-NEXT:    movzwl (%rdi), %eax
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB71_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection123:
+; HASWELL-O3-NEXT:    notl %ecx
+; HASWELL-O3-NEXT:  .Lpcsection124:
+; HASWELL-O3-NEXT:    orl $65493, %ecx # imm = 0xFFD5
+; HASWELL-O3-NEXT:  .Lpcsection125:
+; HASWELL-O3-NEXT:    # kill: def $ax killed $ax killed $eax
+; HASWELL-O3-NEXT:  .Lpcsection126:
+; HASWELL-O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection127:
+; HASWELL-O3-NEXT:    # kill: def $ax killed $ax def $eax
+; HASWELL-O3-NEXT:  .Lpcsection128:
+; HASWELL-O3-NEXT:    jne .LBB71_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i16 42 acquire, align 2, !pcsections !0
@@ -3617,6 +4360,15 @@ define void @atomic16_xchg_release(ptr %a) {
 ; O3-NEXT:    xchgw %ax, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_xchg_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movw $42, %ax
+; HASWELL-O3-NEXT:  .Lpcsection129:
+; HASWELL-O3-NEXT:    xchgw %ax, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i16 42 release, align 2, !pcsections !0
@@ -3656,6 +4408,14 @@ define void @atomic16_add_release(ptr %a) {
 ; O3-NEXT:    lock addw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_add_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection130:
+; HASWELL-O3-NEXT:    lock addw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i16 42 release, align 2, !pcsections !0
@@ -3695,6 +4455,14 @@ define void @atomic16_sub_release(ptr %a) {
 ; O3-NEXT:    lock subw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_sub_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection131:
+; HASWELL-O3-NEXT:    lock subw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i16 42 release, align 2, !pcsections !0
@@ -3734,6 +4502,14 @@ define void @atomic16_and_release(ptr %a) {
 ; O3-NEXT:    lock andw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_and_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection132:
+; HASWELL-O3-NEXT:    lock andw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i16 42 release, align 2, !pcsections !0
@@ -3773,6 +4549,14 @@ define void @atomic16_or_release(ptr %a) {
 ; O3-NEXT:    lock orw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_or_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection133:
+; HASWELL-O3-NEXT:    lock orw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i16 42 release, align 2, !pcsections !0
@@ -3812,6 +4596,14 @@ define void @atomic16_xor_release(ptr %a) {
 ; O3-NEXT:    lock xorw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_xor_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection134:
+; HASWELL-O3-NEXT:    lock xorw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i16 42 release, align 2, !pcsections !0
@@ -3928,6 +4720,31 @@ define void @atomic16_nand_release(ptr %a) {
 ; O3-NEXT:  # %bb.2: # %atomicrmw.end
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_nand_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection135:
+; HASWELL-O3-NEXT:    movzwl (%rdi), %eax
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB78_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection136:
+; HASWELL-O3-NEXT:    notl %ecx
+; HASWELL-O3-NEXT:  .Lpcsection137:
+; HASWELL-O3-NEXT:    orl $65493, %ecx # imm = 0xFFD5
+; HASWELL-O3-NEXT:  .Lpcsection138:
+; HASWELL-O3-NEXT:    # kill: def $ax killed $ax killed $eax
+; HASWELL-O3-NEXT:  .Lpcsection139:
+; HASWELL-O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection140:
+; HASWELL-O3-NEXT:    # kill: def $ax killed $ax def $eax
+; HASWELL-O3-NEXT:  .Lpcsection141:
+; HASWELL-O3-NEXT:    jne .LBB78_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i16 42 release, align 2, !pcsections !0
@@ -3971,6 +4788,15 @@ define void @atomic16_xchg_acq_rel(ptr %a) {
 ; O3-NEXT:    xchgw %ax, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_xchg_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movw $42, %ax
+; HASWELL-O3-NEXT:  .Lpcsection142:
+; HASWELL-O3-NEXT:    xchgw %ax, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i16 42 acq_rel, align 2, !pcsections !0
@@ -4010,6 +4836,14 @@ define void @atomic16_add_acq_rel(ptr %a) {
 ; O3-NEXT:    lock addw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_add_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection143:
+; HASWELL-O3-NEXT:    lock addw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i16 42 acq_rel, align 2, !pcsections !0
@@ -4049,6 +4883,14 @@ define void @atomic16_sub_acq_rel(ptr %a) {
 ; O3-NEXT:    lock subw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_sub_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection144:
+; HASWELL-O3-NEXT:    lock subw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i16 42 acq_rel, align 2, !pcsections !0
@@ -4088,6 +4930,14 @@ define void @atomic16_and_acq_rel(ptr %a) {
 ; O3-NEXT:    lock andw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_and_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection145:
+; HASWELL-O3-NEXT:    lock andw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i16 42 acq_rel, align 2, !pcsections !0
@@ -4127,6 +4977,14 @@ define void @atomic16_or_acq_rel(ptr %a) {
 ; O3-NEXT:    lock orw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_or_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection146:
+; HASWELL-O3-NEXT:    lock orw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i16 42 acq_rel, align 2, !pcsections !0
@@ -4166,6 +5024,14 @@ define void @atomic16_xor_acq_rel(ptr %a) {
 ; O3-NEXT:    lock xorw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_xor_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection147:
+; HASWELL-O3-NEXT:    lock xorw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i16 42 acq_rel, align 2, !pcsections !0
@@ -4282,6 +5148,31 @@ define void @atomic16_nand_acq_rel(ptr %a) {
 ; O3-NEXT:  # %bb.2: # %atomicrmw.end
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_nand_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection148:
+; HASWELL-O3-NEXT:    movzwl (%rdi), %eax
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB85_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection149:
+; HASWELL-O3-NEXT:    notl %ecx
+; HASWELL-O3-NEXT:  .Lpcsection150:
+; HASWELL-O3-NEXT:    orl $65493, %ecx # imm = 0xFFD5
+; HASWELL-O3-NEXT:  .Lpcsection151:
+; HASWELL-O3-NEXT:    # kill: def $ax killed $ax killed $eax
+; HASWELL-O3-NEXT:  .Lpcsection152:
+; HASWELL-O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection153:
+; HASWELL-O3-NEXT:    # kill: def $ax killed $ax def $eax
+; HASWELL-O3-NEXT:  .Lpcsection154:
+; HASWELL-O3-NEXT:    jne .LBB85_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i16 42 acq_rel, align 2, !pcsections !0
@@ -4325,6 +5216,15 @@ define void @atomic16_xchg_seq_cst(ptr %a) {
 ; O3-NEXT:    xchgw %ax, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_xchg_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movw $42, %ax
+; HASWELL-O3-NEXT:  .Lpcsection155:
+; HASWELL-O3-NEXT:    xchgw %ax, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i16 42 seq_cst, align 2, !pcsections !0
@@ -4364,6 +5264,14 @@ define void @atomic16_add_seq_cst(ptr %a) {
 ; O3-NEXT:    lock addw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_add_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection156:
+; HASWELL-O3-NEXT:    lock addw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i16 42 seq_cst, align 2, !pcsections !0
@@ -4403,6 +5311,14 @@ define void @atomic16_sub_seq_cst(ptr %a) {
 ; O3-NEXT:    lock subw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_sub_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection157:
+; HASWELL-O3-NEXT:    lock subw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i16 42 seq_cst, align 2, !pcsections !0
@@ -4442,6 +5358,14 @@ define void @atomic16_and_seq_cst(ptr %a) {
 ; O3-NEXT:    lock andw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_and_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection158:
+; HASWELL-O3-NEXT:    lock andw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i16 42 seq_cst, align 2, !pcsections !0
@@ -4481,6 +5405,14 @@ define void @atomic16_or_seq_cst(ptr %a) {
 ; O3-NEXT:    lock orw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_or_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection159:
+; HASWELL-O3-NEXT:    lock orw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i16 42 seq_cst, align 2, !pcsections !0
@@ -4520,6 +5452,14 @@ define void @atomic16_xor_seq_cst(ptr %a) {
 ; O3-NEXT:    lock xorw $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_xor_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection160:
+; HASWELL-O3-NEXT:    lock xorw $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i16 42 seq_cst, align 2, !pcsections !0
@@ -4636,6 +5576,31 @@ define void @atomic16_nand_seq_cst(ptr %a) {
 ; O3-NEXT:  # %bb.2: # %atomicrmw.end
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_nand_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection161:
+; HASWELL-O3-NEXT:    movzwl (%rdi), %eax
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB92_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection162:
+; HASWELL-O3-NEXT:    notl %ecx
+; HASWELL-O3-NEXT:  .Lpcsection163:
+; HASWELL-O3-NEXT:    orl $65493, %ecx # imm = 0xFFD5
+; HASWELL-O3-NEXT:  .Lpcsection164:
+; HASWELL-O3-NEXT:    # kill: def $ax killed $ax killed $eax
+; HASWELL-O3-NEXT:  .Lpcsection165:
+; HASWELL-O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection166:
+; HASWELL-O3-NEXT:    # kill: def $ax killed $ax def $eax
+; HASWELL-O3-NEXT:  .Lpcsection167:
+; HASWELL-O3-NEXT:    jne .LBB92_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i16 42 seq_cst, align 2, !pcsections !0
@@ -4712,6 +5677,22 @@ define void @atomic16_cas_monotonic(ptr %a) {
 ; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_cas_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movw $1, %cx
+; HASWELL-O3-NEXT:    movw $42, %ax
+; HASWELL-O3-NEXT:  .Lpcsection168:
+; HASWELL-O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT:    movw $42, %ax
+; HASWELL-O3-NEXT:  .Lpcsection169:
+; HASWELL-O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT:    movw $42, %ax
+; HASWELL-O3-NEXT:  .Lpcsection170:
+; HASWELL-O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i16 42, i16 1 monotonic monotonic, align 2, !pcsections !0
@@ -4790,6 +5771,22 @@ define void @atomic16_cas_acquire(ptr %a) {
 ; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_cas_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movw $1, %cx
+; HASWELL-O3-NEXT:    movw $42, %ax
+; HASWELL-O3-NEXT:  .Lpcsection171:
+; HASWELL-O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT:    movw $42, %ax
+; HASWELL-O3-NEXT:  .Lpcsection172:
+; HASWELL-O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT:    movw $42, %ax
+; HASWELL-O3-NEXT:  .Lpcsection173:
+; HASWELL-O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i16 42, i16 1 acquire monotonic, align 2, !pcsections !0
@@ -4868,6 +5865,22 @@ define void @atomic16_cas_release(ptr %a) {
 ; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_cas_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movw $1, %cx
+; HASWELL-O3-NEXT:    movw $42, %ax
+; HASWELL-O3-NEXT:  .Lpcsection174:
+; HASWELL-O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT:    movw $42, %ax
+; HASWELL-O3-NEXT:  .Lpcsection175:
+; HASWELL-O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT:    movw $42, %ax
+; HASWELL-O3-NEXT:  .Lpcsection176:
+; HASWELL-O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i16 42, i16 1 release monotonic, align 2, !pcsections !0
@@ -4946,6 +5959,22 @@ define void @atomic16_cas_acq_rel(ptr %a) {
 ; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_cas_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movw $1, %cx
+; HASWELL-O3-NEXT:    movw $42, %ax
+; HASWELL-O3-NEXT:  .Lpcsection177:
+; HASWELL-O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT:    movw $42, %ax
+; HASWELL-O3-NEXT:  .Lpcsection178:
+; HASWELL-O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT:    movw $42, %ax
+; HASWELL-O3-NEXT:  .Lpcsection179:
+; HASWELL-O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i16 42, i16 1 acq_rel monotonic, align 2, !pcsections !0
@@ -5024,6 +6053,22 @@ define void @atomic16_cas_seq_cst(ptr %a) {
 ; O3-NEXT:    lock cmpxchgw %cx, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic16_cas_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movw $1, %cx
+; HASWELL-O3-NEXT:    movw $42, %ax
+; HASWELL-O3-NEXT:  .Lpcsection180:
+; HASWELL-O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT:    movw $42, %ax
+; HASWELL-O3-NEXT:  .Lpcsection181:
+; HASWELL-O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT:    movw $42, %ax
+; HASWELL-O3-NEXT:  .Lpcsection182:
+; HASWELL-O3-NEXT:    lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i16 42, i16 1 seq_cst monotonic, align 2, !pcsections !0
@@ -5065,6 +6110,14 @@ define i32 @atomic32_load_unordered(ptr %a) {
 ; O3-NEXT:    movl (%rdi), %eax
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_load_unordered:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection183:
+; HASWELL-O3-NEXT:    movl (%rdi), %eax
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = load atomic i32, ptr %a unordered, align 4, !pcsections !0
@@ -5104,6 +6157,14 @@ define i32 @atomic32_load_monotonic(ptr %a) {
 ; O3-NEXT:    movl (%rdi), %eax
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_load_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection184:
+; HASWELL-O3-NEXT:    movl (%rdi), %eax
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = load atomic i32, ptr %a monotonic, align 4, !pcsections !0
@@ -5143,6 +6204,14 @@ define i32 @atomic32_load_acquire(ptr %a) {
 ; O3-NEXT:    movl (%rdi), %eax
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_load_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection185:
+; HASWELL-O3-NEXT:    movl (%rdi), %eax
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = load atomic i32, ptr %a acquire, align 4, !pcsections !0
@@ -5182,6 +6251,14 @@ define i32 @atomic32_load_seq_cst(ptr %a) {
 ; O3-NEXT:    movl (%rdi), %eax
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_load_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection186:
+; HASWELL-O3-NEXT:    movl (%rdi), %eax
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = load atomic i32, ptr %a seq_cst, align 4, !pcsections !0
@@ -5221,6 +6298,14 @@ define void @atomic32_store_unordered(ptr %a) {
 ; O3-NEXT:    movl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_store_unordered:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection187:
+; HASWELL-O3-NEXT:    movl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   store atomic i32 42, ptr %a unordered, align 4, !pcsections !0
@@ -5260,6 +6345,14 @@ define void @atomic32_store_monotonic(ptr %a) {
 ; O3-NEXT:    movl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_store_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection188:
+; HASWELL-O3-NEXT:    movl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   store atomic i32 42, ptr %a monotonic, align 4, !pcsections !0
@@ -5299,6 +6392,14 @@ define void @atomic32_store_release(ptr %a) {
 ; O3-NEXT:    movl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_store_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection189:
+; HASWELL-O3-NEXT:    movl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   store atomic i32 42, ptr %a release, align 4, !pcsections !0
@@ -5342,6 +6443,15 @@ define void @atomic32_store_seq_cst(ptr %a) {
 ; O3-NEXT:    xchgl %eax, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_store_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection190:
+; HASWELL-O3-NEXT:    xchgl %eax, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   store atomic i32 42, ptr %a seq_cst, align 4, !pcsections !0
@@ -5385,6 +6495,15 @@ define void @atomic32_xchg_monotonic(ptr %a) {
 ; O3-NEXT:    xchgl %eax, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_xchg_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection191:
+; HASWELL-O3-NEXT:    xchgl %eax, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i32 42 monotonic, align 4, !pcsections !0
@@ -5424,6 +6543,14 @@ define void @atomic32_add_monotonic(ptr %a) {
 ; O3-NEXT:    lock addl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_add_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection192:
+; HASWELL-O3-NEXT:    lock addl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i32 42 monotonic, align 4, !pcsections !0
@@ -5463,6 +6590,14 @@ define void @atomic32_sub_monotonic(ptr %a) {
 ; O3-NEXT:    lock subl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_sub_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection193:
+; HASWELL-O3-NEXT:    lock subl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i32 42 monotonic, align 4, !pcsections !0
@@ -5502,6 +6637,14 @@ define void @atomic32_and_monotonic(ptr %a) {
 ; O3-NEXT:    lock andl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_and_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection194:
+; HASWELL-O3-NEXT:    lock andl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i32 42 monotonic, align 4, !pcsections !0
@@ -5541,6 +6684,14 @@ define void @atomic32_or_monotonic(ptr %a) {
 ; O3-NEXT:    lock orl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_or_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection195:
+; HASWELL-O3-NEXT:    lock orl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i32 42 monotonic, align 4, !pcsections !0
@@ -5580,6 +6731,14 @@ define void @atomic32_xor_monotonic(ptr %a) {
 ; O3-NEXT:    lock xorl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_xor_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection196:
+; HASWELL-O3-NEXT:    lock xorl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i32 42 monotonic, align 4, !pcsections !0
@@ -5680,6 +6839,27 @@ define void @atomic32_nand_monotonic(ptr %a) {
 ; O3-NEXT:  # %bb.2: # %atomicrmw.end
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_nand_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection197:
+; HASWELL-O3-NEXT:    movl (%rdi), %eax
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB112_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection198:
+; HASWELL-O3-NEXT:    notl %ecx
+; HASWELL-O3-NEXT:  .Lpcsection199:
+; HASWELL-O3-NEXT:    orl $-43, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection200:
+; HASWELL-O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection201:
+; HASWELL-O3-NEXT:    jne .LBB112_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i32 42 monotonic, align 4, !pcsections !0
@@ -5723,6 +6903,15 @@ define void @atomic32_xchg_acquire(ptr %a) {
 ; O3-NEXT:    xchgl %eax, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_xchg_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection202:
+; HASWELL-O3-NEXT:    xchgl %eax, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i32 42 acquire, align 4, !pcsections !0
@@ -5762,6 +6951,14 @@ define void @atomic32_add_acquire(ptr %a) {
 ; O3-NEXT:    lock addl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_add_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection203:
+; HASWELL-O3-NEXT:    lock addl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i32 42 acquire, align 4, !pcsections !0
@@ -5801,6 +6998,14 @@ define void @atomic32_sub_acquire(ptr %a) {
 ; O3-NEXT:    lock subl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_sub_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection204:
+; HASWELL-O3-NEXT:    lock subl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i32 42 acquire, align 4, !pcsections !0
@@ -5840,6 +7045,14 @@ define void @atomic32_and_acquire(ptr %a) {
 ; O3-NEXT:    lock andl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_and_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection205:
+; HASWELL-O3-NEXT:    lock andl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i32 42 acquire, align 4, !pcsections !0
@@ -5879,6 +7092,14 @@ define void @atomic32_or_acquire(ptr %a) {
 ; O3-NEXT:    lock orl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_or_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection206:
+; HASWELL-O3-NEXT:    lock orl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i32 42 acquire, align 4, !pcsections !0
@@ -5918,6 +7139,14 @@ define void @atomic32_xor_acquire(ptr %a) {
 ; O3-NEXT:    lock xorl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_xor_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection207:
+; HASWELL-O3-NEXT:    lock xorl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i32 42 acquire, align 4, !pcsections !0
@@ -6018,6 +7247,27 @@ define void @atomic32_nand_acquire(ptr %a) {
 ; O3-NEXT:  # %bb.2: # %atomicrmw.end
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_nand_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection208:
+; HASWELL-O3-NEXT:    movl (%rdi), %eax
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB119_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection209:
+; HASWELL-O3-NEXT:    notl %ecx
+; HASWELL-O3-NEXT:  .Lpcsection210:
+; HASWELL-O3-NEXT:    orl $-43, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection211:
+; HASWELL-O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection212:
+; HASWELL-O3-NEXT:    jne .LBB119_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i32 42 acquire, align 4, !pcsections !0
@@ -6061,6 +7311,15 @@ define void @atomic32_xchg_release(ptr %a) {
 ; O3-NEXT:    xchgl %eax, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_xchg_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection213:
+; HASWELL-O3-NEXT:    xchgl %eax, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i32 42 release, align 4, !pcsections !0
@@ -6100,6 +7359,14 @@ define void @atomic32_add_release(ptr %a) {
 ; O3-NEXT:    lock addl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_add_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection214:
+; HASWELL-O3-NEXT:    lock addl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i32 42 release, align 4, !pcsections !0
@@ -6139,6 +7406,14 @@ define void @atomic32_sub_release(ptr %a) {
 ; O3-NEXT:    lock subl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_sub_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection215:
+; HASWELL-O3-NEXT:    lock subl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i32 42 release, align 4, !pcsections !0
@@ -6178,6 +7453,14 @@ define void @atomic32_and_release(ptr %a) {
 ; O3-NEXT:    lock andl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_and_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection216:
+; HASWELL-O3-NEXT:    lock andl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i32 42 release, align 4, !pcsections !0
@@ -6217,6 +7500,14 @@ define void @atomic32_or_release(ptr %a) {
 ; O3-NEXT:    lock orl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_or_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection217:
+; HASWELL-O3-NEXT:    lock orl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i32 42 release, align 4, !pcsections !0
@@ -6256,6 +7547,14 @@ define void @atomic32_xor_release(ptr %a) {
 ; O3-NEXT:    lock xorl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_xor_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection218:
+; HASWELL-O3-NEXT:    lock xorl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i32 42 release, align 4, !pcsections !0
@@ -6356,6 +7655,27 @@ define void @atomic32_nand_release(ptr %a) {
 ; O3-NEXT:  # %bb.2: # %atomicrmw.end
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_nand_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection219:
+; HASWELL-O3-NEXT:    movl (%rdi), %eax
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB126_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection220:
+; HASWELL-O3-NEXT:    notl %ecx
+; HASWELL-O3-NEXT:  .Lpcsection221:
+; HASWELL-O3-NEXT:    orl $-43, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection222:
+; HASWELL-O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection223:
+; HASWELL-O3-NEXT:    jne .LBB126_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i32 42 release, align 4, !pcsections !0
@@ -6399,6 +7719,15 @@ define void @atomic32_xchg_acq_rel(ptr %a) {
 ; O3-NEXT:    xchgl %eax, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_xchg_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection224:
+; HASWELL-O3-NEXT:    xchgl %eax, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i32 42 acq_rel, align 4, !pcsections !0
@@ -6438,6 +7767,14 @@ define void @atomic32_add_acq_rel(ptr %a) {
 ; O3-NEXT:    lock addl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_add_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection225:
+; HASWELL-O3-NEXT:    lock addl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i32 42 acq_rel, align 4, !pcsections !0
@@ -6477,6 +7814,14 @@ define void @atomic32_sub_acq_rel(ptr %a) {
 ; O3-NEXT:    lock subl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_sub_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection226:
+; HASWELL-O3-NEXT:    lock subl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i32 42 acq_rel, align 4, !pcsections !0
@@ -6516,6 +7861,14 @@ define void @atomic32_and_acq_rel(ptr %a) {
 ; O3-NEXT:    lock andl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_and_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection227:
+; HASWELL-O3-NEXT:    lock andl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i32 42 acq_rel, align 4, !pcsections !0
@@ -6555,6 +7908,14 @@ define void @atomic32_or_acq_rel(ptr %a) {
 ; O3-NEXT:    lock orl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_or_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection228:
+; HASWELL-O3-NEXT:    lock orl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i32 42 acq_rel, align 4, !pcsections !0
@@ -6594,6 +7955,14 @@ define void @atomic32_xor_acq_rel(ptr %a) {
 ; O3-NEXT:    lock xorl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_xor_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection229:
+; HASWELL-O3-NEXT:    lock xorl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i32 42 acq_rel, align 4, !pcsections !0
@@ -6694,6 +8063,27 @@ define void @atomic32_nand_acq_rel(ptr %a) {
 ; O3-NEXT:  # %bb.2: # %atomicrmw.end
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_nand_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection230:
+; HASWELL-O3-NEXT:    movl (%rdi), %eax
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB133_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection231:
+; HASWELL-O3-NEXT:    notl %ecx
+; HASWELL-O3-NEXT:  .Lpcsection232:
+; HASWELL-O3-NEXT:    orl $-43, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection233:
+; HASWELL-O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection234:
+; HASWELL-O3-NEXT:    jne .LBB133_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i32 42 acq_rel, align 4, !pcsections !0
@@ -6737,6 +8127,15 @@ define void @atomic32_xchg_seq_cst(ptr %a) {
 ; O3-NEXT:    xchgl %eax, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_xchg_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection235:
+; HASWELL-O3-NEXT:    xchgl %eax, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i32 42 seq_cst, align 4, !pcsections !0
@@ -6776,6 +8175,14 @@ define void @atomic32_add_seq_cst(ptr %a) {
 ; O3-NEXT:    lock addl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_add_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection236:
+; HASWELL-O3-NEXT:    lock addl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i32 42 seq_cst, align 4, !pcsections !0
@@ -6815,6 +8222,14 @@ define void @atomic32_sub_seq_cst(ptr %a) {
 ; O3-NEXT:    lock subl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_sub_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection237:
+; HASWELL-O3-NEXT:    lock subl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i32 42 seq_cst, align 4, !pcsections !0
@@ -6854,6 +8269,14 @@ define void @atomic32_and_seq_cst(ptr %a) {
 ; O3-NEXT:    lock andl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_and_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection238:
+; HASWELL-O3-NEXT:    lock andl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i32 42 seq_cst, align 4, !pcsections !0
@@ -6893,6 +8316,14 @@ define void @atomic32_or_seq_cst(ptr %a) {
 ; O3-NEXT:    lock orl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_or_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection239:
+; HASWELL-O3-NEXT:    lock orl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i32 42 seq_cst, align 4, !pcsections !0
@@ -6932,6 +8363,14 @@ define void @atomic32_xor_seq_cst(ptr %a) {
 ; O3-NEXT:    lock xorl $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_xor_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection240:
+; HASWELL-O3-NEXT:    lock xorl $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i32 42 seq_cst, align 4, !pcsections !0
@@ -7032,6 +8471,27 @@ define void @atomic32_nand_seq_cst(ptr %a) {
 ; O3-NEXT:  # %bb.2: # %atomicrmw.end
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_nand_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection241:
+; HASWELL-O3-NEXT:    movl (%rdi), %eax
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB140_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection242:
+; HASWELL-O3-NEXT:    notl %ecx
+; HASWELL-O3-NEXT:  .Lpcsection243:
+; HASWELL-O3-NEXT:    orl $-43, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection244:
+; HASWELL-O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection245:
+; HASWELL-O3-NEXT:    jne .LBB140_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i32 42 seq_cst, align 4, !pcsections !0
@@ -7117,6 +8577,25 @@ define void @atomic32_cas_monotonic(ptr %a) {
 ; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_cas_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movl $1, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection246:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection247:
+; HASWELL-O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection248:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection249:
+; HASWELL-O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection250:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection251:
+; HASWELL-O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i32 42, i32 1 monotonic monotonic, align 4, !pcsections !0
@@ -7204,6 +8683,25 @@ define void @atomic32_cas_acquire(ptr %a) {
 ; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_cas_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movl $1, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection252:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection253:
+; HASWELL-O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection254:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection255:
+; HASWELL-O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection256:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection257:
+; HASWELL-O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i32 42, i32 1 acquire monotonic, align 4, !pcsections !0
@@ -7291,6 +8789,25 @@ define void @atomic32_cas_release(ptr %a) {
 ; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_cas_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movl $1, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection258:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection259:
+; HASWELL-O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection260:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection261:
+; HASWELL-O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection262:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection263:
+; HASWELL-O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i32 42, i32 1 release monotonic, align 4, !pcsections !0
@@ -7378,6 +8895,25 @@ define void @atomic32_cas_acq_rel(ptr %a) {
 ; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_cas_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movl $1, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection264:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection265:
+; HASWELL-O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection266:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection267:
+; HASWELL-O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection268:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection269:
+; HASWELL-O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i32 42, i32 1 acq_rel monotonic, align 4, !pcsections !0
@@ -7465,6 +9001,25 @@ define void @atomic32_cas_seq_cst(ptr %a) {
 ; O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic32_cas_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movl $1, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection270:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection271:
+; HASWELL-O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection272:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection273:
+; HASWELL-O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection274:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection275:
+; HASWELL-O3-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i32 42, i32 1 seq_cst monotonic, align 4, !pcsections !0
@@ -7506,6 +9061,14 @@ define i64 @atomic64_load_unordered(ptr %a) {
 ; O3-NEXT:    movq (%rdi), %rax
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_load_unordered:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection276:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = load atomic i64, ptr %a unordered, align 8, !pcsections !0
@@ -7545,6 +9108,14 @@ define i64 @atomic64_load_monotonic(ptr %a) {
 ; O3-NEXT:    movq (%rdi), %rax
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_load_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection277:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = load atomic i64, ptr %a monotonic, align 8, !pcsections !0
@@ -7584,6 +9155,14 @@ define i64 @atomic64_load_acquire(ptr %a) {
 ; O3-NEXT:    movq (%rdi), %rax
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_load_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection278:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = load atomic i64, ptr %a acquire, align 8, !pcsections !0
@@ -7623,6 +9202,14 @@ define i64 @atomic64_load_seq_cst(ptr %a) {
 ; O3-NEXT:    movq (%rdi), %rax
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_load_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection279:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = load atomic i64, ptr %a seq_cst, align 8, !pcsections !0
@@ -7662,6 +9249,14 @@ define ptr @atomic64_load_seq_cst_ptr_ty(ptr %a) {
 ; O3-NEXT:    movq (%rdi), %rax
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_load_seq_cst_ptr_ty:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection280:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = load atomic ptr, ptr %a seq_cst, align 8, !pcsections !0
@@ -7701,6 +9296,14 @@ define void @atomic64_store_unordered(ptr %a) {
 ; O3-NEXT:    movq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_store_unordered:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection281:
+; HASWELL-O3-NEXT:    movq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   store atomic i64 42, ptr %a unordered, align 8, !pcsections !0
@@ -7740,6 +9343,14 @@ define void @atomic64_store_monotonic(ptr %a) {
 ; O3-NEXT:    movq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_store_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection282:
+; HASWELL-O3-NEXT:    movq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   store atomic i64 42, ptr %a monotonic, align 8, !pcsections !0
@@ -7779,6 +9390,14 @@ define void @atomic64_store_release(ptr %a) {
 ; O3-NEXT:    movq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_store_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection283:
+; HASWELL-O3-NEXT:    movq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   store atomic i64 42, ptr %a release, align 8, !pcsections !0
@@ -7822,6 +9441,15 @@ define void @atomic64_store_seq_cst(ptr %a) {
 ; O3-NEXT:    xchgq %rax, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_store_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection284:
+; HASWELL-O3-NEXT:    xchgq %rax, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   store atomic i64 42, ptr %a seq_cst, align 8, !pcsections !0
@@ -7861,6 +9489,14 @@ define void @atomic64_store_seq_cst_ptr_ty(ptr %a, ptr %v) {
 ; O3-NEXT:    xchgq %rsi, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_store_seq_cst_ptr_ty:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection285:
+; HASWELL-O3-NEXT:    xchgq %rsi, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   store atomic ptr %v, ptr %a seq_cst, align 8, !pcsections !0
@@ -7904,6 +9540,15 @@ define void @atomic64_xchg_monotonic(ptr %a) {
 ; O3-NEXT:    xchgq %rax, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_xchg_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection286:
+; HASWELL-O3-NEXT:    xchgq %rax, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i64 42 monotonic, align 8, !pcsections !0
@@ -7943,6 +9588,14 @@ define void @atomic64_add_monotonic(ptr %a) {
 ; O3-NEXT:    lock addq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_add_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection287:
+; HASWELL-O3-NEXT:    lock addq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i64 42 monotonic, align 8, !pcsections !0
@@ -7982,6 +9635,14 @@ define void @atomic64_sub_monotonic(ptr %a) {
 ; O3-NEXT:    lock subq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_sub_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection288:
+; HASWELL-O3-NEXT:    lock subq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i64 42 monotonic, align 8, !pcsections !0
@@ -8021,6 +9682,14 @@ define void @atomic64_and_monotonic(ptr %a) {
 ; O3-NEXT:    lock andq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_and_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection289:
+; HASWELL-O3-NEXT:    lock andq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i64 42 monotonic, align 8, !pcsections !0
@@ -8060,6 +9729,14 @@ define void @atomic64_or_monotonic(ptr %a) {
 ; O3-NEXT:    lock orq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_or_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection290:
+; HASWELL-O3-NEXT:    lock orq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i64 42 monotonic, align 8, !pcsections !0
@@ -8099,6 +9776,14 @@ define void @atomic64_xor_monotonic(ptr %a) {
 ; O3-NEXT:    lock xorq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_xor_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection291:
+; HASWELL-O3-NEXT:    lock xorq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i64 42 monotonic, align 8, !pcsections !0
@@ -8202,6 +9887,27 @@ define void @atomic64_nand_monotonic(ptr %a) {
 ; O3-NEXT:  # %bb.2: # %atomicrmw.end
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_nand_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection292:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB162_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection293:
+; HASWELL-O3-NEXT:    notl %ecx
+; HASWELL-O3-NEXT:  .Lpcsection294:
+; HASWELL-O3-NEXT:    orq $-43, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection295:
+; HASWELL-O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection296:
+; HASWELL-O3-NEXT:    jne .LBB162_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i64 42 monotonic, align 8, !pcsections !0
@@ -8245,6 +9951,15 @@ define void @atomic64_xchg_acquire(ptr %a) {
 ; O3-NEXT:    xchgq %rax, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_xchg_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection297:
+; HASWELL-O3-NEXT:    xchgq %rax, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i64 42 acquire, align 8, !pcsections !0
@@ -8284,6 +9999,14 @@ define void @atomic64_add_acquire(ptr %a) {
 ; O3-NEXT:    lock addq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_add_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection298:
+; HASWELL-O3-NEXT:    lock addq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i64 42 acquire, align 8, !pcsections !0
@@ -8323,6 +10046,14 @@ define void @atomic64_sub_acquire(ptr %a) {
 ; O3-NEXT:    lock subq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_sub_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection299:
+; HASWELL-O3-NEXT:    lock subq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i64 42 acquire, align 8, !pcsections !0
@@ -8362,6 +10093,14 @@ define void @atomic64_and_acquire(ptr %a) {
 ; O3-NEXT:    lock andq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_and_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection300:
+; HASWELL-O3-NEXT:    lock andq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i64 42 acquire, align 8, !pcsections !0
@@ -8401,6 +10140,14 @@ define void @atomic64_or_acquire(ptr %a) {
 ; O3-NEXT:    lock orq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_or_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection301:
+; HASWELL-O3-NEXT:    lock orq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i64 42 acquire, align 8, !pcsections !0
@@ -8440,6 +10187,14 @@ define void @atomic64_xor_acquire(ptr %a) {
 ; O3-NEXT:    lock xorq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_xor_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection302:
+; HASWELL-O3-NEXT:    lock xorq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i64 42 acquire, align 8, !pcsections !0
@@ -8543,6 +10298,27 @@ define void @atomic64_nand_acquire(ptr %a) {
 ; O3-NEXT:  # %bb.2: # %atomicrmw.end
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_nand_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection303:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB169_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection304:
+; HASWELL-O3-NEXT:    notl %ecx
+; HASWELL-O3-NEXT:  .Lpcsection305:
+; HASWELL-O3-NEXT:    orq $-43, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection306:
+; HASWELL-O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection307:
+; HASWELL-O3-NEXT:    jne .LBB169_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i64 42 acquire, align 8, !pcsections !0
@@ -8586,6 +10362,15 @@ define void @atomic64_xchg_release(ptr %a) {
 ; O3-NEXT:    xchgq %rax, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_xchg_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection308:
+; HASWELL-O3-NEXT:    xchgq %rax, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i64 42 release, align 8, !pcsections !0
@@ -8625,6 +10410,14 @@ define void @atomic64_add_release(ptr %a) {
 ; O3-NEXT:    lock addq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_add_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection309:
+; HASWELL-O3-NEXT:    lock addq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i64 42 release, align 8, !pcsections !0
@@ -8664,6 +10457,14 @@ define void @atomic64_sub_release(ptr %a) {
 ; O3-NEXT:    lock subq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_sub_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection310:
+; HASWELL-O3-NEXT:    lock subq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i64 42 release, align 8, !pcsections !0
@@ -8703,6 +10504,14 @@ define void @atomic64_and_release(ptr %a) {
 ; O3-NEXT:    lock andq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_and_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection311:
+; HASWELL-O3-NEXT:    lock andq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i64 42 release, align 8, !pcsections !0
@@ -8742,6 +10551,14 @@ define void @atomic64_or_release(ptr %a) {
 ; O3-NEXT:    lock orq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_or_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection312:
+; HASWELL-O3-NEXT:    lock orq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i64 42 release, align 8, !pcsections !0
@@ -8781,6 +10598,14 @@ define void @atomic64_xor_release(ptr %a) {
 ; O3-NEXT:    lock xorq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_xor_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection313:
+; HASWELL-O3-NEXT:    lock xorq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i64 42 release, align 8, !pcsections !0
@@ -8884,6 +10709,27 @@ define void @atomic64_nand_release(ptr %a) {
 ; O3-NEXT:  # %bb.2: # %atomicrmw.end
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_nand_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection314:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB176_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection315:
+; HASWELL-O3-NEXT:    notl %ecx
+; HASWELL-O3-NEXT:  .Lpcsection316:
+; HASWELL-O3-NEXT:    orq $-43, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection317:
+; HASWELL-O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection318:
+; HASWELL-O3-NEXT:    jne .LBB176_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i64 42 release, align 8, !pcsections !0
@@ -8927,6 +10773,15 @@ define void @atomic64_xchg_acq_rel(ptr %a) {
 ; O3-NEXT:    xchgq %rax, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_xchg_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection319:
+; HASWELL-O3-NEXT:    xchgq %rax, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i64 42 acq_rel, align 8, !pcsections !0
@@ -8966,6 +10821,14 @@ define void @atomic64_add_acq_rel(ptr %a) {
 ; O3-NEXT:    lock addq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_add_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection320:
+; HASWELL-O3-NEXT:    lock addq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i64 42 acq_rel, align 8, !pcsections !0
@@ -9005,6 +10868,14 @@ define void @atomic64_sub_acq_rel(ptr %a) {
 ; O3-NEXT:    lock subq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_sub_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection321:
+; HASWELL-O3-NEXT:    lock subq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i64 42 acq_rel, align 8, !pcsections !0
@@ -9044,6 +10915,14 @@ define void @atomic64_and_acq_rel(ptr %a) {
 ; O3-NEXT:    lock andq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_and_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection322:
+; HASWELL-O3-NEXT:    lock andq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i64 42 acq_rel, align 8, !pcsections !0
@@ -9083,6 +10962,14 @@ define void @atomic64_or_acq_rel(ptr %a) {
 ; O3-NEXT:    lock orq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_or_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection323:
+; HASWELL-O3-NEXT:    lock orq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i64 42 acq_rel, align 8, !pcsections !0
@@ -9122,6 +11009,14 @@ define void @atomic64_xor_acq_rel(ptr %a) {
 ; O3-NEXT:    lock xorq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_xor_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection324:
+; HASWELL-O3-NEXT:    lock xorq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i64 42 acq_rel, align 8, !pcsections !0
@@ -9225,6 +11120,27 @@ define void @atomic64_nand_acq_rel(ptr %a) {
 ; O3-NEXT:  # %bb.2: # %atomicrmw.end
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_nand_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection325:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB183_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection326:
+; HASWELL-O3-NEXT:    notl %ecx
+; HASWELL-O3-NEXT:  .Lpcsection327:
+; HASWELL-O3-NEXT:    orq $-43, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection328:
+; HASWELL-O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection329:
+; HASWELL-O3-NEXT:    jne .LBB183_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i64 42 acq_rel, align 8, !pcsections !0
@@ -9268,6 +11184,15 @@ define void @atomic64_xchg_seq_cst(ptr %a) {
 ; O3-NEXT:    xchgq %rax, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_xchg_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection330:
+; HASWELL-O3-NEXT:    xchgq %rax, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i64 42 seq_cst, align 8, !pcsections !0
@@ -9307,6 +11232,14 @@ define void @atomic64_add_seq_cst(ptr %a) {
 ; O3-NEXT:    lock addq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_add_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection331:
+; HASWELL-O3-NEXT:    lock addq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i64 42 seq_cst, align 8, !pcsections !0
@@ -9346,6 +11279,14 @@ define void @atomic64_sub_seq_cst(ptr %a) {
 ; O3-NEXT:    lock subq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_sub_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection332:
+; HASWELL-O3-NEXT:    lock subq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i64 42 seq_cst, align 8, !pcsections !0
@@ -9385,6 +11326,14 @@ define void @atomic64_and_seq_cst(ptr %a) {
 ; O3-NEXT:    lock andq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_and_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection333:
+; HASWELL-O3-NEXT:    lock andq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i64 42 seq_cst, align 8, !pcsections !0
@@ -9424,6 +11373,14 @@ define void @atomic64_or_seq_cst(ptr %a) {
 ; O3-NEXT:    lock orq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_or_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection334:
+; HASWELL-O3-NEXT:    lock orq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i64 42 seq_cst, align 8, !pcsections !0
@@ -9463,6 +11420,14 @@ define void @atomic64_xor_seq_cst(ptr %a) {
 ; O3-NEXT:    lock xorq $42, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_xor_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection335:
+; HASWELL-O3-NEXT:    lock xorq $42, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i64 42 seq_cst, align 8, !pcsections !0
@@ -9566,6 +11531,27 @@ define void @atomic64_nand_seq_cst(ptr %a) {
 ; O3-NEXT:  # %bb.2: # %atomicrmw.end
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_nand_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection336:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB190_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection337:
+; HASWELL-O3-NEXT:    notl %ecx
+; HASWELL-O3-NEXT:  .Lpcsection338:
+; HASWELL-O3-NEXT:    orq $-43, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection339:
+; HASWELL-O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection340:
+; HASWELL-O3-NEXT:    jne .LBB190_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i64 42 seq_cst, align 8, !pcsections !0
@@ -9651,6 +11637,25 @@ define void @atomic64_cas_monotonic(ptr %a) {
 ; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_cas_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movl $1, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection341:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection342:
+; HASWELL-O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection343:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection344:
+; HASWELL-O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection345:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection346:
+; HASWELL-O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i64 42, i64 1 monotonic monotonic, align 8, !pcsections !0
@@ -9738,6 +11743,25 @@ define void @atomic64_cas_acquire(ptr %a) {
 ; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_cas_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movl $1, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection347:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection348:
+; HASWELL-O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection349:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection350:
+; HASWELL-O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection351:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection352:
+; HASWELL-O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i64 42, i64 1 acquire monotonic, align 8, !pcsections !0
@@ -9825,6 +11849,25 @@ define void @atomic64_cas_release(ptr %a) {
 ; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_cas_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movl $1, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection353:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection354:
+; HASWELL-O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection355:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection356:
+; HASWELL-O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection357:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection358:
+; HASWELL-O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i64 42, i64 1 release monotonic, align 8, !pcsections !0
@@ -9912,6 +11955,25 @@ define void @atomic64_cas_acq_rel(ptr %a) {
 ; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_cas_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movl $1, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection359:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection360:
+; HASWELL-O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection361:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection362:
+; HASWELL-O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection363:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection364:
+; HASWELL-O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i64 42, i64 1 acq_rel monotonic, align 8, !pcsections !0
@@ -9999,6 +12061,25 @@ define void @atomic64_cas_seq_cst(ptr %a) {
 ; O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
 ; O3-NEXT:    movq $3, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_cas_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:    movl $1, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection365:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection366:
+; HASWELL-O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection367:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection368:
+; HASWELL-O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection369:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection370:
+; HASWELL-O3-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT:    movq $3, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i64 42, i64 1 seq_cst monotonic, align 8, !pcsections !0
@@ -10044,6 +12125,15 @@ define void @atomic64_cas_seq_cst_ptr_ty(ptr %a, ptr %v1, ptr %v2) {
 ; O3-NEXT:    lock cmpxchgq %rdx, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic64_cas_seq_cst_ptr_ty:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq %rsi, %rax
+; HASWELL-O3-NEXT:    movq foo(%rip), %rcx
+; HASWELL-O3-NEXT:  .Lpcsection371:
+; HASWELL-O3-NEXT:    lock cmpxchgq %rdx, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, ptr %v1, ptr %v2 seq_cst seq_cst, align 8, !pcsections !0
@@ -10102,6 +12192,18 @@ define i64 @atomic_use_cond(ptr %a) {
 ; O3-NEXT:  .LBB197_2: # %else
 ; O3-NEXT:    movl $2, %eax
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic_use_cond:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:  .Lpcsection372:
+; HASWELL-O3-NEXT:    lock decq (%rdi)
+; HASWELL-O3-NEXT:    jne .LBB197_2
+; HASWELL-O3-NEXT:  # %bb.1: # %then
+; HASWELL-O3-NEXT:    movl $1, %eax
+; HASWELL-O3-NEXT:    retq
+; HASWELL-O3-NEXT:  .LBB197_2: # %else
+; HASWELL-O3-NEXT:    movl $2, %eax
+; HASWELL-O3-NEXT:    retq
 entry:
   %x = atomicrmw sub ptr %a, i64 1 seq_cst, align 8, !pcsections !0
   %y = icmp eq i64 %x, 1
@@ -10196,6 +12298,18 @@ define i128 @atomic128_load_unordered(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_load_unordered:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection373:
+; HASWELL-O3-NEXT:    vmovdqa (%rdi), %xmm0
+; HASWELL-O3-NEXT:  .Lpcsection374:
+; HASWELL-O3-NEXT:    vmovq %xmm0, %rax
+; HASWELL-O3-NEXT:  .Lpcsection375:
+; HASWELL-O3-NEXT:    vpextrq $1, %xmm0, %rdx
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = load atomic i128, ptr %a unordered, align 16, !pcsections !0
@@ -10285,6 +12399,18 @@ define i128 @atomic128_load_monotonic(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_load_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection376:
+; HASWELL-O3-NEXT:    vmovdqa (%rdi), %xmm0
+; HASWELL-O3-NEXT:  .Lpcsection377:
+; HASWELL-O3-NEXT:    vmovq %xmm0, %rax
+; HASWELL-O3-NEXT:  .Lpcsection378:
+; HASWELL-O3-NEXT:    vpextrq $1, %xmm0, %rdx
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = load atomic i128, ptr %a monotonic, align 16, !pcsections !0
@@ -10374,6 +12500,18 @@ define i128 @atomic128_load_acquire(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_load_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection379:
+; HASWELL-O3-NEXT:    vmovdqa (%rdi), %xmm0
+; HASWELL-O3-NEXT:  .Lpcsection380:
+; HASWELL-O3-NEXT:    vmovq %xmm0, %rax
+; HASWELL-O3-NEXT:  .Lpcsection381:
+; HASWELL-O3-NEXT:    vpextrq $1, %xmm0, %rdx
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = load atomic i128, ptr %a acquire, align 16, !pcsections !0
@@ -10463,6 +12601,18 @@ define i128 @atomic128_load_seq_cst(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_load_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection382:
+; HASWELL-O3-NEXT:    vmovdqa (%rdi), %xmm0
+; HASWELL-O3-NEXT:  .Lpcsection383:
+; HASWELL-O3-NEXT:    vmovq %xmm0, %rax
+; HASWELL-O3-NEXT:  .Lpcsection384:
+; HASWELL-O3-NEXT:    vpextrq $1, %xmm0, %rdx
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = load atomic i128, ptr %a seq_cst, align 16, !pcsections !0
@@ -10502,6 +12652,14 @@ define ptr @atomic128_load_seq_cst_ptr_ty(ptr %a) {
 ; O3-NEXT:    movq (%rdi), %rax
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_load_seq_cst_ptr_ty:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection385:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = load atomic ptr, ptr %a seq_cst, align 16, !pcsections !0
@@ -10629,6 +12787,16 @@ define void @atomic128_store_unordered(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_store_unordered:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection386:
+; HASWELL-O3-NEXT:    vmovss {{.*#+}} xmm0 = [42,0,0,0]
+; HASWELL-O3-NEXT:  .Lpcsection387:
+; HASWELL-O3-NEXT:    vmovaps %xmm0, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   store atomic i128 42, ptr %a unordered, align 16, !pcsections !0
@@ -10756,6 +12924,16 @@ define void @atomic128_store_monotonic(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_store_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection388:
+; HASWELL-O3-NEXT:    vmovss {{.*#+}} xmm0 = [42,0,0,0]
+; HASWELL-O3-NEXT:  .Lpcsection389:
+; HASWELL-O3-NEXT:    vmovaps %xmm0, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   store atomic i128 42, ptr %a monotonic, align 16, !pcsections !0
@@ -10883,6 +13061,16 @@ define void @atomic128_store_release(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_store_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection390:
+; HASWELL-O3-NEXT:    vmovss {{.*#+}} xmm0 = [42,0,0,0]
+; HASWELL-O3-NEXT:  .Lpcsection391:
+; HASWELL-O3-NEXT:    vmovaps %xmm0, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   store atomic i128 42, ptr %a release, align 16, !pcsections !0
@@ -11010,6 +13198,18 @@ define void @atomic128_store_seq_cst(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_store_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection392:
+; HASWELL-O3-NEXT:    vmovss {{.*#+}} xmm0 = [42,0,0,0]
+; HASWELL-O3-NEXT:  .Lpcsection393:
+; HASWELL-O3-NEXT:    vmovaps %xmm0, (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection394:
+; HASWELL-O3-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   store atomic i128 42, ptr %a seq_cst, align 16, !pcsections !0
@@ -11049,6 +13249,14 @@ define void @atomic128_store_seq_cst_ptr_ty(ptr %a, ptr %v) {
 ; O3-NEXT:    xchgq %rsi, (%rdi)
 ; O3-NEXT:    movq $1, foo(%rip)
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_store_seq_cst_ptr_ty:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection395:
+; HASWELL-O3-NEXT:    xchgq %rsi, (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   store atomic ptr %v, ptr %a seq_cst, align 16, !pcsections !0
@@ -11176,6 +13384,33 @@ define void @atomic128_xchg_monotonic(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_xchg_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection396:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection397:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:  .Lpcsection398:
+; HASWELL-O3-NEXT:    movl $42, %ebx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB208_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:  .Lpcsection399:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection400:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection401:
+; HASWELL-O3-NEXT:    jne .LBB208_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i128 42 monotonic, align 16, !pcsections !0
@@ -11309,6 +13544,35 @@ define void @atomic128_add_monotonic(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_add_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection402:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection403:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB209_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movq %rax, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection404:
+; HASWELL-O3-NEXT:    addq $42, %rbx
+; HASWELL-O3-NEXT:    movq %rdx, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection405:
+; HASWELL-O3-NEXT:    adcq $0, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection406:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection407:
+; HASWELL-O3-NEXT:    jne .LBB209_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i128 42 monotonic, align 16, !pcsections !0
@@ -11442,6 +13706,35 @@ define void @atomic128_sub_monotonic(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_sub_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection408:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection409:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB210_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movq %rax, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection410:
+; HASWELL-O3-NEXT:    addq $-42, %rbx
+; HASWELL-O3-NEXT:    movq %rdx, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection411:
+; HASWELL-O3-NEXT:    adcq $-1, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection412:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection413:
+; HASWELL-O3-NEXT:    jne .LBB210_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i128 42 monotonic, align 16, !pcsections !0
@@ -11574,6 +13867,34 @@ define void @atomic128_and_monotonic(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_and_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection414:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection415:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB211_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ebx
+; HASWELL-O3-NEXT:  .Lpcsection416:
+; HASWELL-O3-NEXT:    andl $42, %ebx
+; HASWELL-O3-NEXT:  .Lpcsection417:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection418:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection419:
+; HASWELL-O3-NEXT:    jne .LBB211_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i128 42 monotonic, align 16, !pcsections !0
@@ -11699,6 +14020,33 @@ define void @atomic128_or_monotonic(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_or_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection420:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection421:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB212_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movq %rax, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection422:
+; HASWELL-O3-NEXT:    orq $42, %rbx
+; HASWELL-O3-NEXT:    movq %rdx, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection423:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection424:
+; HASWELL-O3-NEXT:    jne .LBB212_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i128 42 monotonic, align 16, !pcsections !0
@@ -11824,6 +14172,33 @@ define void @atomic128_xor_monotonic(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_xor_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection425:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection426:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB213_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movq %rax, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection427:
+; HASWELL-O3-NEXT:    xorq $42, %rbx
+; HASWELL-O3-NEXT:    movq %rdx, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection428:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection429:
+; HASWELL-O3-NEXT:    jne .LBB213_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i128 42 monotonic, align 16, !pcsections !0
@@ -11964,6 +14339,36 @@ define void @atomic128_nand_monotonic(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_nand_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection430:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection431:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:  .Lpcsection432:
+; HASWELL-O3-NEXT:    movq $-1, %rcx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB214_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ebx
+; HASWELL-O3-NEXT:  .Lpcsection433:
+; HASWELL-O3-NEXT:    notl %ebx
+; HASWELL-O3-NEXT:  .Lpcsection434:
+; HASWELL-O3-NEXT:    orq $-43, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection435:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection436:
+; HASWELL-O3-NEXT:    jne .LBB214_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i128 42 monotonic, align 16, !pcsections !0
@@ -12091,6 +14496,33 @@ define void @atomic128_xchg_acquire(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_xchg_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection437:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection438:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:  .Lpcsection439:
+; HASWELL-O3-NEXT:    movl $42, %ebx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB215_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:  .Lpcsection440:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection441:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection442:
+; HASWELL-O3-NEXT:    jne .LBB215_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i128 42 acquire, align 16, !pcsections !0
@@ -12224,6 +14656,35 @@ define void @atomic128_add_acquire(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_add_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection443:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection444:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB216_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movq %rax, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection445:
+; HASWELL-O3-NEXT:    addq $42, %rbx
+; HASWELL-O3-NEXT:    movq %rdx, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection446:
+; HASWELL-O3-NEXT:    adcq $0, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection447:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection448:
+; HASWELL-O3-NEXT:    jne .LBB216_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i128 42 acquire, align 16, !pcsections !0
@@ -12357,6 +14818,35 @@ define void @atomic128_sub_acquire(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_sub_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection449:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection450:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB217_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movq %rax, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection451:
+; HASWELL-O3-NEXT:    addq $-42, %rbx
+; HASWELL-O3-NEXT:    movq %rdx, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection452:
+; HASWELL-O3-NEXT:    adcq $-1, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection453:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection454:
+; HASWELL-O3-NEXT:    jne .LBB217_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i128 42 acquire, align 16, !pcsections !0
@@ -12489,6 +14979,34 @@ define void @atomic128_and_acquire(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_and_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection455:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection456:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB218_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ebx
+; HASWELL-O3-NEXT:  .Lpcsection457:
+; HASWELL-O3-NEXT:    andl $42, %ebx
+; HASWELL-O3-NEXT:  .Lpcsection458:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection459:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection460:
+; HASWELL-O3-NEXT:    jne .LBB218_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i128 42 acquire, align 16, !pcsections !0
@@ -12614,6 +15132,33 @@ define void @atomic128_or_acquire(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_or_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection461:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection462:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB219_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movq %rax, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection463:
+; HASWELL-O3-NEXT:    orq $42, %rbx
+; HASWELL-O3-NEXT:    movq %rdx, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection464:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection465:
+; HASWELL-O3-NEXT:    jne .LBB219_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i128 42 acquire, align 16, !pcsections !0
@@ -12739,6 +15284,33 @@ define void @atomic128_xor_acquire(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_xor_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection466:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection467:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB220_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movq %rax, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection468:
+; HASWELL-O3-NEXT:    xorq $42, %rbx
+; HASWELL-O3-NEXT:    movq %rdx, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection469:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection470:
+; HASWELL-O3-NEXT:    jne .LBB220_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i128 42 acquire, align 16, !pcsections !0
@@ -12879,6 +15451,36 @@ define void @atomic128_nand_acquire(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_nand_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection471:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection472:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:  .Lpcsection473:
+; HASWELL-O3-NEXT:    movq $-1, %rcx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB221_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ebx
+; HASWELL-O3-NEXT:  .Lpcsection474:
+; HASWELL-O3-NEXT:    notl %ebx
+; HASWELL-O3-NEXT:  .Lpcsection475:
+; HASWELL-O3-NEXT:    orq $-43, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection476:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection477:
+; HASWELL-O3-NEXT:    jne .LBB221_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i128 42 acquire, align 16, !pcsections !0
@@ -13006,6 +15608,33 @@ define void @atomic128_xchg_release(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_xchg_release:
+; HASWELL-O3:       # %bb.0:
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection478:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection479:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:  .Lpcsection480:
+; HASWELL-O3-NEXT:    movl $42, %ebx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB222_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:  .Lpcsection481:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection482:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection483:
+; HASWELL-O3-NEXT:    jne .LBB222_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i128 42 release, align 16, !pcsections !0
   store volatile i64 1, ptr @foo, align 8
@@ -13138,6 +15767,35 @@ define void @atomic128_add_release(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_add_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection484:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection485:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB223_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movq %rax, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection486:
+; HASWELL-O3-NEXT:    addq $42, %rbx
+; HASWELL-O3-NEXT:    movq %rdx, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection487:
+; HASWELL-O3-NEXT:    adcq $0, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection488:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection489:
+; HASWELL-O3-NEXT:    jne .LBB223_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i128 42 release, align 16, !pcsections !0
@@ -13271,6 +15929,35 @@ define void @atomic128_sub_release(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_sub_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection490:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection491:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB224_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movq %rax, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection492:
+; HASWELL-O3-NEXT:    addq $-42, %rbx
+; HASWELL-O3-NEXT:    movq %rdx, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection493:
+; HASWELL-O3-NEXT:    adcq $-1, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection494:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection495:
+; HASWELL-O3-NEXT:    jne .LBB224_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i128 42 release, align 16, !pcsections !0
@@ -13403,6 +16090,34 @@ define void @atomic128_and_release(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_and_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection496:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection497:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB225_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ebx
+; HASWELL-O3-NEXT:  .Lpcsection498:
+; HASWELL-O3-NEXT:    andl $42, %ebx
+; HASWELL-O3-NEXT:  .Lpcsection499:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection500:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection501:
+; HASWELL-O3-NEXT:    jne .LBB225_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i128 42 release, align 16, !pcsections !0
@@ -13528,6 +16243,33 @@ define void @atomic128_or_release(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_or_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection502:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection503:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB226_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movq %rax, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection504:
+; HASWELL-O3-NEXT:    orq $42, %rbx
+; HASWELL-O3-NEXT:    movq %rdx, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection505:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection506:
+; HASWELL-O3-NEXT:    jne .LBB226_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i128 42 release, align 16, !pcsections !0
@@ -13653,6 +16395,33 @@ define void @atomic128_xor_release(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_xor_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection507:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection508:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB227_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movq %rax, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection509:
+; HASWELL-O3-NEXT:    xorq $42, %rbx
+; HASWELL-O3-NEXT:    movq %rdx, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection510:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection511:
+; HASWELL-O3-NEXT:    jne .LBB227_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i128 42 release, align 16, !pcsections !0
@@ -13793,6 +16562,36 @@ define void @atomic128_nand_release(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_nand_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection512:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection513:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:  .Lpcsection514:
+; HASWELL-O3-NEXT:    movq $-1, %rcx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB228_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ebx
+; HASWELL-O3-NEXT:  .Lpcsection515:
+; HASWELL-O3-NEXT:    notl %ebx
+; HASWELL-O3-NEXT:  .Lpcsection516:
+; HASWELL-O3-NEXT:    orq $-43, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection517:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection518:
+; HASWELL-O3-NEXT:    jne .LBB228_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i128 42 release, align 16, !pcsections !0
@@ -13920,6 +16719,33 @@ define void @atomic128_xchg_acq_rel(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_xchg_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection519:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection520:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:  .Lpcsection521:
+; HASWELL-O3-NEXT:    movl $42, %ebx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB229_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:  .Lpcsection522:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection523:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection524:
+; HASWELL-O3-NEXT:    jne .LBB229_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i128 42 acq_rel, align 16, !pcsections !0
@@ -14053,6 +16879,35 @@ define void @atomic128_add_acq_rel(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_add_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection525:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection526:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB230_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movq %rax, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection527:
+; HASWELL-O3-NEXT:    addq $42, %rbx
+; HASWELL-O3-NEXT:    movq %rdx, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection528:
+; HASWELL-O3-NEXT:    adcq $0, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection529:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection530:
+; HASWELL-O3-NEXT:    jne .LBB230_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i128 42 acq_rel, align 16, !pcsections !0
@@ -14186,6 +17041,35 @@ define void @atomic128_sub_acq_rel(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_sub_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection531:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection532:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB231_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movq %rax, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection533:
+; HASWELL-O3-NEXT:    addq $-42, %rbx
+; HASWELL-O3-NEXT:    movq %rdx, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection534:
+; HASWELL-O3-NEXT:    adcq $-1, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection535:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection536:
+; HASWELL-O3-NEXT:    jne .LBB231_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i128 42 acq_rel, align 16, !pcsections !0
@@ -14318,6 +17202,34 @@ define void @atomic128_and_acq_rel(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_and_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection537:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection538:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB232_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ebx
+; HASWELL-O3-NEXT:  .Lpcsection539:
+; HASWELL-O3-NEXT:    andl $42, %ebx
+; HASWELL-O3-NEXT:  .Lpcsection540:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection541:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection542:
+; HASWELL-O3-NEXT:    jne .LBB232_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i128 42 acq_rel, align 16, !pcsections !0
@@ -14443,6 +17355,33 @@ define void @atomic128_or_acq_rel(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_or_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection543:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection544:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB233_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movq %rax, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection545:
+; HASWELL-O3-NEXT:    orq $42, %rbx
+; HASWELL-O3-NEXT:    movq %rdx, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection546:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection547:
+; HASWELL-O3-NEXT:    jne .LBB233_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i128 42 acq_rel, align 16, !pcsections !0
@@ -14568,6 +17507,33 @@ define void @atomic128_xor_acq_rel(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_xor_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection548:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection549:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB234_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movq %rax, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection550:
+; HASWELL-O3-NEXT:    xorq $42, %rbx
+; HASWELL-O3-NEXT:    movq %rdx, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection551:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection552:
+; HASWELL-O3-NEXT:    jne .LBB234_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i128 42 acq_rel, align 16, !pcsections !0
@@ -14708,6 +17674,36 @@ define void @atomic128_nand_acq_rel(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_nand_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection553:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection554:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:  .Lpcsection555:
+; HASWELL-O3-NEXT:    movq $-1, %rcx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB235_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ebx
+; HASWELL-O3-NEXT:  .Lpcsection556:
+; HASWELL-O3-NEXT:    notl %ebx
+; HASWELL-O3-NEXT:  .Lpcsection557:
+; HASWELL-O3-NEXT:    orq $-43, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection558:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection559:
+; HASWELL-O3-NEXT:    jne .LBB235_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i128 42 acq_rel, align 16, !pcsections !0
@@ -14835,6 +17831,33 @@ define void @atomic128_xchg_seq_cst(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_xchg_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection560:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection561:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:  .Lpcsection562:
+; HASWELL-O3-NEXT:    movl $42, %ebx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB236_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:  .Lpcsection563:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection564:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection565:
+; HASWELL-O3-NEXT:    jne .LBB236_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xchg ptr %a, i128 42 seq_cst, align 16, !pcsections !0
@@ -14968,6 +17991,35 @@ define void @atomic128_add_seq_cst(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_add_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection566:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection567:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB237_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movq %rax, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection568:
+; HASWELL-O3-NEXT:    addq $42, %rbx
+; HASWELL-O3-NEXT:    movq %rdx, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection569:
+; HASWELL-O3-NEXT:    adcq $0, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection570:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection571:
+; HASWELL-O3-NEXT:    jne .LBB237_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw add ptr %a, i128 42 seq_cst, align 16, !pcsections !0
@@ -15101,6 +18153,35 @@ define void @atomic128_sub_seq_cst(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_sub_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection572:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection573:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB238_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movq %rax, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection574:
+; HASWELL-O3-NEXT:    addq $-42, %rbx
+; HASWELL-O3-NEXT:    movq %rdx, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection575:
+; HASWELL-O3-NEXT:    adcq $-1, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection576:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection577:
+; HASWELL-O3-NEXT:    jne .LBB238_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw sub ptr %a, i128 42 seq_cst, align 16, !pcsections !0
@@ -15233,6 +18314,34 @@ define void @atomic128_and_seq_cst(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_and_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection578:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection579:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB239_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ebx
+; HASWELL-O3-NEXT:  .Lpcsection580:
+; HASWELL-O3-NEXT:    andl $42, %ebx
+; HASWELL-O3-NEXT:  .Lpcsection581:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection582:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection583:
+; HASWELL-O3-NEXT:    jne .LBB239_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw and ptr %a, i128 42 seq_cst, align 16, !pcsections !0
@@ -15358,6 +18467,33 @@ define void @atomic128_or_seq_cst(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_or_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection584:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection585:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB240_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movq %rax, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection586:
+; HASWELL-O3-NEXT:    orq $42, %rbx
+; HASWELL-O3-NEXT:    movq %rdx, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection587:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection588:
+; HASWELL-O3-NEXT:    jne .LBB240_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw or ptr %a, i128 42 seq_cst, align 16, !pcsections !0
@@ -15483,6 +18619,33 @@ define void @atomic128_xor_seq_cst(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_xor_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection589:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection590:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB241_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movq %rax, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection591:
+; HASWELL-O3-NEXT:    xorq $42, %rbx
+; HASWELL-O3-NEXT:    movq %rdx, %rcx
+; HASWELL-O3-NEXT:  .Lpcsection592:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection593:
+; HASWELL-O3-NEXT:    jne .LBB241_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw xor ptr %a, i128 42 seq_cst, align 16, !pcsections !0
@@ -15623,6 +18786,36 @@ define void @atomic128_nand_seq_cst(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_nand_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection594:
+; HASWELL-O3-NEXT:    movq (%rdi), %rax
+; HASWELL-O3-NEXT:  .Lpcsection595:
+; HASWELL-O3-NEXT:    movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT:  .Lpcsection596:
+; HASWELL-O3-NEXT:    movq $-1, %rcx
+; HASWELL-O3-NEXT:    .p2align 4
+; HASWELL-O3-NEXT:  .LBB242_1: # %atomicrmw.start
+; HASWELL-O3-NEXT:    # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT:    movl %eax, %ebx
+; HASWELL-O3-NEXT:  .Lpcsection597:
+; HASWELL-O3-NEXT:    notl %ebx
+; HASWELL-O3-NEXT:  .Lpcsection598:
+; HASWELL-O3-NEXT:    orq $-43, %rbx
+; HASWELL-O3-NEXT:  .Lpcsection599:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection600:
+; HASWELL-O3-NEXT:    jne .LBB242_1
+; HASWELL-O3-NEXT:  # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = atomicrmw nand ptr %a, i128 42 seq_cst, align 16, !pcsections !0
@@ -15781,6 +18974,43 @@ define void @atomic128_cas_monotonic(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_cas_monotonic:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection601:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection602:
+; HASWELL-O3-NEXT:    movl $1, %ebx
+; HASWELL-O3-NEXT:  .Lpcsection603:
+; HASWELL-O3-NEXT:    xorl %edx, %edx
+; HASWELL-O3-NEXT:  .Lpcsection604:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection605:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection606:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection607:
+; HASWELL-O3-NEXT:    xorl %edx, %edx
+; HASWELL-O3-NEXT:  .Lpcsection608:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection609:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection610:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection611:
+; HASWELL-O3-NEXT:    xorl %edx, %edx
+; HASWELL-O3-NEXT:  .Lpcsection612:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection613:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i128 42, i128 1 monotonic monotonic, align 16, !pcsections !0
@@ -15941,6 +19171,43 @@ define void @atomic128_cas_acquire(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_cas_acquire:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection614:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection615:
+; HASWELL-O3-NEXT:    movl $1, %ebx
+; HASWELL-O3-NEXT:  .Lpcsection616:
+; HASWELL-O3-NEXT:    xorl %edx, %edx
+; HASWELL-O3-NEXT:  .Lpcsection617:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection618:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection619:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection620:
+; HASWELL-O3-NEXT:    xorl %edx, %edx
+; HASWELL-O3-NEXT:  .Lpcsection621:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection622:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection623:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection624:
+; HASWELL-O3-NEXT:    xorl %edx, %edx
+; HASWELL-O3-NEXT:  .Lpcsection625:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection626:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i128 42, i128 1 acquire monotonic, align 16, !pcsections !0
@@ -16101,6 +19368,43 @@ define void @atomic128_cas_release(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_cas_release:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection627:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection628:
+; HASWELL-O3-NEXT:    movl $1, %ebx
+; HASWELL-O3-NEXT:  .Lpcsection629:
+; HASWELL-O3-NEXT:    xorl %edx, %edx
+; HASWELL-O3-NEXT:  .Lpcsection630:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection631:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection632:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection633:
+; HASWELL-O3-NEXT:    xorl %edx, %edx
+; HASWELL-O3-NEXT:  .Lpcsection634:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection635:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection636:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection637:
+; HASWELL-O3-NEXT:    xorl %edx, %edx
+; HASWELL-O3-NEXT:  .Lpcsection638:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection639:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i128 42, i128 1 release monotonic, align 16, !pcsections !0
@@ -16261,6 +19565,43 @@ define void @atomic128_cas_acq_rel(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_cas_acq_rel:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection640:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection641:
+; HASWELL-O3-NEXT:    movl $1, %ebx
+; HASWELL-O3-NEXT:  .Lpcsection642:
+; HASWELL-O3-NEXT:    xorl %edx, %edx
+; HASWELL-O3-NEXT:  .Lpcsection643:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection644:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection645:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection646:
+; HASWELL-O3-NEXT:    xorl %edx, %edx
+; HASWELL-O3-NEXT:  .Lpcsection647:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection648:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection649:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection650:
+; HASWELL-O3-NEXT:    xorl %edx, %edx
+; HASWELL-O3-NEXT:  .Lpcsection651:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection652:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:    movq $1, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i128 42, i128 1 acq_rel monotonic, align 16, !pcsections !0
@@ -16421,6 +19762,43 @@ define void @atomic128_cas_seq_cst(ptr %a) {
 ; O3-NEXT:    popq %rbx
 ; O3-NEXT:    .cfi_def_cfa_offset 8
 ; O3-NEXT:    retq
+;
+; HASWELL-O3-LABEL: atomic128_cas_seq_cst:
+; HASWELL-O3:       # %bb.0: # %entry
+; HASWELL-O3-NEXT:    pushq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT:    .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT:    movq foo(%rip), %rax
+; HASWELL-O3-NEXT:  .Lpcsection653:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection654:
+; HASWELL-O3-NEXT:    movl $1, %ebx
+; HASWELL-O3-NEXT:  .Lpcsection655:
+; HASWELL-O3-NEXT:    xorl %edx, %edx
+; HASWELL-O3-NEXT:  .Lpcsection656:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection657:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection658:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection659:
+; HASWELL-O3-NEXT:    xorl %edx, %edx
+; HASWELL-O3-NEXT:  .Lpcsection660:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection661:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:  .Lpcsection662:
+; HASWELL-O3-NEXT:    movl $42, %eax
+; HASWELL-O3-NEXT:  .Lpcsection663:
+; HASWELL-O3-NEXT:    xorl %edx, %edx
+; HASWELL-O3-NEXT:  .Lpcsection664:
+; HASWELL-O3-NEXT:    xorl %ecx, %ecx
+; HASWELL-O3-NEXT:  .Lpcsection665:
+; HASWELL-O3-NEXT:    lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT:    movq $3, foo(%rip)
+; HASWELL-O3-NEXT:    popq %rbx
+; HASWELL-O3-NEXT:    .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT:    retq
 entry:
   load volatile i64, ptr @foo, align 8
   %x = cmpxchg ptr %a, i128 42, i128 1 seq_cst monotonic, align 16, !pcsections !0



More information about the llvm-commits mailing list