[llvm] r371790 - [Test] Restructure check lines to show differences between modes more clearly

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 12 16:22:37 PDT 2019


Author: reames
Date: Thu Sep 12 16:22:37 2019
New Revision: 371790

URL: http://llvm.org/viewvc/llvm-project?rev=371790&view=rev
Log:
[Test] Restructure check lines to show differences between modes more clearly

With the landing of the previous patch (in particular D66318) there are a lot fewer diffs now.  I added an experimental O0 line, and updated all the tests to group experimental and non-experimental O0/O3 together.

Skimming the remaining diffs, there's only a few which are obviously incorrect.  There's a large number which are questionable, so more todo.


Modified:
    llvm/trunk/test/CodeGen/X86/atomic-unordered.ll

Modified: llvm/trunk/test/CodeGen/X86/atomic-unordered.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic-unordered.ll?rev=371790&r1=371789&r2=371790&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/atomic-unordered.ll (original)
+++ llvm/trunk/test/CodeGen/X86/atomic-unordered.ll Thu Sep 12 16:22:37 2019
@@ -1,7 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake | FileCheck --check-prefixes=CHECK,CHECK-NOX,CHECK-O0 %s
-; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake | FileCheck --check-prefixes=CHECK,CHECK-NOX,CHECK-O3 %s
-; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel | FileCheck --check-prefixes=CHECK,CHECK-EX %s
+; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake | FileCheck --check-prefixes=CHECK,CHECK-O0,CHECK-O0-CUR %s
+; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake | FileCheck --check-prefixes=CHECK,CHECK-O3,CHECK-O3-CUR %s
+; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel | FileCheck --check-prefixes=CHECK,CHECK-O0,CHECK-O0-EX %s
+; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel | FileCheck --check-prefixes=CHECK,CHECK-O3,CHECK-O3-EX %s
 
 define i8 @load_i8(i8* %ptr) {
 ; CHECK-LABEL: load_i8:
@@ -23,11 +24,6 @@ define void @store_i8(i8* %ptr, i8 %v) {
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    movb %sil, (%rdi)
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: store_i8:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movb %sil, (%rdi)
-; CHECK-EX-NEXT:    retq
   store atomic i8 %v, i8* %ptr unordered, align 1
   ret void
 }
@@ -42,11 +38,6 @@ define i16 @load_i16(i16* %ptr) {
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    movzwl (%rdi), %eax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_i16:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movzwl (%rdi), %eax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i16, i16* %ptr unordered, align 2
   ret i16 %v
 }
@@ -63,11 +54,6 @@ define void @store_i16(i16* %ptr, i16 %v
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    movw %si, (%rdi)
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: store_i16:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movw %si, (%rdi)
-; CHECK-EX-NEXT:    retq
   store atomic i16 %v, i16* %ptr unordered, align 2
   ret void
 }
@@ -127,11 +113,6 @@ define void @narrow_writeback_or(i64* %p
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    orq $7, (%rdi)
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: narrow_writeback_or:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    orq $7, (%rdi)
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %ptr unordered, align 8
   %v.new = or i64 %v, 7
   store atomic i64 %v.new, i64* %ptr unordered, align 8
@@ -154,12 +135,6 @@ define void @narrow_writeback_and(i64* %
 ; CHECK-O3-NEXT:    movl $4294967040, %eax # imm = 0xFFFFFF00
 ; CHECK-O3-NEXT:    andq %rax, (%rdi)
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: narrow_writeback_and:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movl $4294967040, %eax # imm = 0xFFFFFF00
-; CHECK-EX-NEXT:    andq %rax, (%rdi)
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %ptr unordered, align 8
   %v.new = and i64 %v, 4294967040 ;; 0xFFFF_FF00
   store atomic i64 %v.new, i64* %ptr unordered, align 8
@@ -179,11 +154,6 @@ define void @narrow_writeback_xor(i64* %
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    xorq $7, (%rdi)
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: narrow_writeback_xor:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    xorq $7, (%rdi)
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %ptr unordered, align 8
   %v.new = xor i64 %v, 7
   store atomic i64 %v.new, i64* %ptr unordered, align 8
@@ -194,7 +164,6 @@ define void @narrow_writeback_xor(i64* %
 ;; improve codegeneration.  Note that widening is only legal if the
 ;; resulting type would be atomic.  Each tests has a well aligned, and
 ;; unaligned variant to ensure we get correct codegen here.
-;;
 ;; Note: It's not a legality issue, but there's a gotcha here to be aware
 ;; of.  Once we widen a pair of atomic stores, we loose the information
 ;; that the original atomicity requirement was half the width.  Given that,
@@ -285,20 +254,6 @@ define i128 @load_i128(i128* %ptr) {
 ; CHECK-O3-NEXT:    popq %rbx
 ; CHECK-O3-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_i128:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    pushq %rbx
-; CHECK-EX-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-EX-NEXT:    .cfi_offset %rbx, -16
-; CHECK-EX-NEXT:    xorl %eax, %eax
-; CHECK-EX-NEXT:    xorl %edx, %edx
-; CHECK-EX-NEXT:    xorl %ecx, %ecx
-; CHECK-EX-NEXT:    xorl %ebx, %ebx
-; CHECK-EX-NEXT:    lock cmpxchg16b (%rdi)
-; CHECK-EX-NEXT:    popq %rbx
-; CHECK-EX-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-EX-NEXT:    retq
   %v = load atomic i128, i128* %ptr unordered, align 16
   ret i128 %v
 }
@@ -352,25 +307,6 @@ define void @store_i128(i128* %ptr, i128
 ; CHECK-O3-NEXT:    popq %rbx
 ; CHECK-O3-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: store_i128:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    pushq %rbx
-; CHECK-EX-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-EX-NEXT:    .cfi_offset %rbx, -16
-; CHECK-EX-NEXT:    movq %rdx, %rcx
-; CHECK-EX-NEXT:    movq %rsi, %rbx
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    movq 8(%rdi), %rdx
-; CHECK-EX-NEXT:    .p2align 4, 0x90
-; CHECK-EX-NEXT:  .LBB16_1: # %atomicrmw.start
-; CHECK-EX-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-EX-NEXT:    lock cmpxchg16b (%rdi)
-; CHECK-EX-NEXT:    jne .LBB16_1
-; CHECK-EX-NEXT:  # %bb.2: # %atomicrmw.end
-; CHECK-EX-NEXT:    popq %rbx
-; CHECK-EX-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-EX-NEXT:    retq
   store atomic i128 %v, i128* %ptr unordered, align 16
   ret void
 }
@@ -424,28 +360,6 @@ define i256 @load_i256(i256* %ptr) {
 ; CHECK-O3-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-O3-NEXT:    vzeroupper
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_i256:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    pushq %rbx
-; CHECK-EX-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-EX-NEXT:    subq $32, %rsp
-; CHECK-EX-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-EX-NEXT:    .cfi_offset %rbx, -16
-; CHECK-EX-NEXT:    movq %rdi, %rbx
-; CHECK-EX-NEXT:    movq %rsp, %rdx
-; CHECK-EX-NEXT:    movl $32, %edi
-; CHECK-EX-NEXT:    xorl %ecx, %ecx
-; CHECK-EX-NEXT:    callq __atomic_load
-; CHECK-EX-NEXT:    vmovups (%rsp), %ymm0
-; CHECK-EX-NEXT:    vmovups %ymm0, (%rbx)
-; CHECK-EX-NEXT:    movq %rbx, %rax
-; CHECK-EX-NEXT:    addq $32, %rsp
-; CHECK-EX-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-EX-NEXT:    popq %rbx
-; CHECK-EX-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-EX-NEXT:    vzeroupper
-; CHECK-EX-NEXT:    retq
   %v = load atomic i256, i256* %ptr unordered, align 16
   ret i256 %v
 }
@@ -489,43 +403,39 @@ define void @store_i256(i256* %ptr, i256
 ; CHECK-O3-NEXT:    addq $40, %rsp
 ; CHECK-O3-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: store_i256:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    subq $40, %rsp
-; CHECK-EX-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-EX-NEXT:    movq %rdi, %rax
-; CHECK-EX-NEXT:    movq %r8, {{[0-9]+}}(%rsp)
-; CHECK-EX-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
-; CHECK-EX-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-EX-NEXT:    movq %rsi, {{[0-9]+}}(%rsp)
-; CHECK-EX-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
-; CHECK-EX-NEXT:    movl $32, %edi
-; CHECK-EX-NEXT:    movq %rax, %rsi
-; CHECK-EX-NEXT:    xorl %ecx, %ecx
-; CHECK-EX-NEXT:    callq __atomic_store
-; CHECK-EX-NEXT:    addq $40, %rsp
-; CHECK-EX-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-EX-NEXT:    retq
   store atomic i256 %v, i256* %ptr unordered, align 16
   ret void
 }
 
 ; Legal if wider type is also atomic (TODO)
 define void @vec_store(i32* %p0, <2 x i32> %vec) {
-; CHECK-NOX-LABEL: vec_store:
-; CHECK-NOX:       # %bb.0:
-; CHECK-NOX-NEXT:    vmovd %xmm0, %eax
-; CHECK-NOX-NEXT:    vpextrd $1, %xmm0, %ecx
-; CHECK-NOX-NEXT:    movl %eax, (%rdi)
-; CHECK-NOX-NEXT:    movl %ecx, 4(%rdi)
-; CHECK-NOX-NEXT:    retq
-;
-; CHECK-EX-LABEL: vec_store:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    vmovss %xmm0, (%rdi)
-; CHECK-EX-NEXT:    vextractps $1, %xmm0, 4(%rdi)
-; CHECK-EX-NEXT:    retq
+; CHECK-O0-CUR-LABEL: vec_store:
+; CHECK-O0-CUR:       # %bb.0:
+; CHECK-O0-CUR-NEXT:    vmovd %xmm0, %eax
+; CHECK-O0-CUR-NEXT:    vpextrd $1, %xmm0, %ecx
+; CHECK-O0-CUR-NEXT:    movl %eax, (%rdi)
+; CHECK-O0-CUR-NEXT:    movl %ecx, 4(%rdi)
+; CHECK-O0-CUR-NEXT:    retq
+;
+; CHECK-O3-CUR-LABEL: vec_store:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    vmovd %xmm0, %eax
+; CHECK-O3-CUR-NEXT:    vpextrd $1, %xmm0, %ecx
+; CHECK-O3-CUR-NEXT:    movl %eax, (%rdi)
+; CHECK-O3-CUR-NEXT:    movl %ecx, 4(%rdi)
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O0-EX-LABEL: vec_store:
+; CHECK-O0-EX:       # %bb.0:
+; CHECK-O0-EX-NEXT:    vmovd %xmm0, (%rdi)
+; CHECK-O0-EX-NEXT:    vpextrd $1, %xmm0, 4(%rdi)
+; CHECK-O0-EX-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: vec_store:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    vmovss %xmm0, (%rdi)
+; CHECK-O3-EX-NEXT:    vextractps $1, %xmm0, 4(%rdi)
+; CHECK-O3-EX-NEXT:    retq
   %v1 = extractelement <2 x i32> %vec, i32 0
   %v2 = extractelement <2 x i32> %vec, i32 1
   %p1 = getelementptr i32, i32* %p0, i64 1
@@ -536,19 +446,33 @@ define void @vec_store(i32* %p0, <2 x i3
 
 ; Not legal to widen due to alignment restriction
 define void @vec_store_unaligned(i32* %p0, <2 x i32> %vec) {
-; CHECK-NOX-LABEL: vec_store_unaligned:
-; CHECK-NOX:       # %bb.0:
-; CHECK-NOX-NEXT:    vmovd %xmm0, %eax
-; CHECK-NOX-NEXT:    vpextrd $1, %xmm0, %ecx
-; CHECK-NOX-NEXT:    movl %eax, (%rdi)
-; CHECK-NOX-NEXT:    movl %ecx, 4(%rdi)
-; CHECK-NOX-NEXT:    retq
-;
-; CHECK-EX-LABEL: vec_store_unaligned:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    vmovss %xmm0, (%rdi)
-; CHECK-EX-NEXT:    vextractps $1, %xmm0, 4(%rdi)
-; CHECK-EX-NEXT:    retq
+; CHECK-O0-CUR-LABEL: vec_store_unaligned:
+; CHECK-O0-CUR:       # %bb.0:
+; CHECK-O0-CUR-NEXT:    vmovd %xmm0, %eax
+; CHECK-O0-CUR-NEXT:    vpextrd $1, %xmm0, %ecx
+; CHECK-O0-CUR-NEXT:    movl %eax, (%rdi)
+; CHECK-O0-CUR-NEXT:    movl %ecx, 4(%rdi)
+; CHECK-O0-CUR-NEXT:    retq
+;
+; CHECK-O3-CUR-LABEL: vec_store_unaligned:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    vmovd %xmm0, %eax
+; CHECK-O3-CUR-NEXT:    vpextrd $1, %xmm0, %ecx
+; CHECK-O3-CUR-NEXT:    movl %eax, (%rdi)
+; CHECK-O3-CUR-NEXT:    movl %ecx, 4(%rdi)
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O0-EX-LABEL: vec_store_unaligned:
+; CHECK-O0-EX:       # %bb.0:
+; CHECK-O0-EX-NEXT:    vmovd %xmm0, (%rdi)
+; CHECK-O0-EX-NEXT:    vpextrd $1, %xmm0, 4(%rdi)
+; CHECK-O0-EX-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: vec_store_unaligned:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    vmovss %xmm0, (%rdi)
+; CHECK-O3-EX-NEXT:    vextractps $1, %xmm0, 4(%rdi)
+; CHECK-O3-EX-NEXT:    retq
   %v1 = extractelement <2 x i32> %vec, i32 0
   %v2 = extractelement <2 x i32> %vec, i32 1
   %p1 = getelementptr i32, i32* %p0, i64 1
@@ -562,18 +486,31 @@ define void @vec_store_unaligned(i32* %p
 ; Legal if wider type is also atomic (TODO)
 ; Also, can avoid register move from xmm to eax (TODO)
 define void @widen_broadcast2(i32* %p0, <2 x i32> %vec) {
-; CHECK-NOX-LABEL: widen_broadcast2:
-; CHECK-NOX:       # %bb.0:
-; CHECK-NOX-NEXT:    vmovd %xmm0, %eax
-; CHECK-NOX-NEXT:    movl %eax, (%rdi)
-; CHECK-NOX-NEXT:    movl %eax, 4(%rdi)
-; CHECK-NOX-NEXT:    retq
-;
-; CHECK-EX-LABEL: widen_broadcast2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    vmovss %xmm0, (%rdi)
-; CHECK-EX-NEXT:    vmovss %xmm0, 4(%rdi)
-; CHECK-EX-NEXT:    retq
+; CHECK-O0-CUR-LABEL: widen_broadcast2:
+; CHECK-O0-CUR:       # %bb.0:
+; CHECK-O0-CUR-NEXT:    vmovd %xmm0, %eax
+; CHECK-O0-CUR-NEXT:    movl %eax, (%rdi)
+; CHECK-O0-CUR-NEXT:    movl %eax, 4(%rdi)
+; CHECK-O0-CUR-NEXT:    retq
+;
+; CHECK-O3-CUR-LABEL: widen_broadcast2:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    vmovd %xmm0, %eax
+; CHECK-O3-CUR-NEXT:    movl %eax, (%rdi)
+; CHECK-O3-CUR-NEXT:    movl %eax, 4(%rdi)
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O0-EX-LABEL: widen_broadcast2:
+; CHECK-O0-EX:       # %bb.0:
+; CHECK-O0-EX-NEXT:    vmovd %xmm0, (%rdi)
+; CHECK-O0-EX-NEXT:    vmovd %xmm0, 4(%rdi)
+; CHECK-O0-EX-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: widen_broadcast2:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    vmovss %xmm0, (%rdi)
+; CHECK-O3-EX-NEXT:    vmovss %xmm0, 4(%rdi)
+; CHECK-O3-EX-NEXT:    retq
   %v1 = extractelement <2 x i32> %vec, i32 0
   %p1 = getelementptr i32, i32* %p0, i64 1
   store atomic i32 %v1, i32* %p0 unordered, align 8
@@ -583,18 +520,31 @@ define void @widen_broadcast2(i32* %p0,
 
 ; Not legal to widen due to alignment restriction
 define void @widen_broadcast2_unaligned(i32* %p0, <2 x i32> %vec) {
-; CHECK-NOX-LABEL: widen_broadcast2_unaligned:
-; CHECK-NOX:       # %bb.0:
-; CHECK-NOX-NEXT:    vmovd %xmm0, %eax
-; CHECK-NOX-NEXT:    movl %eax, (%rdi)
-; CHECK-NOX-NEXT:    movl %eax, 4(%rdi)
-; CHECK-NOX-NEXT:    retq
-;
-; CHECK-EX-LABEL: widen_broadcast2_unaligned:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    vmovss %xmm0, (%rdi)
-; CHECK-EX-NEXT:    vmovss %xmm0, 4(%rdi)
-; CHECK-EX-NEXT:    retq
+; CHECK-O0-CUR-LABEL: widen_broadcast2_unaligned:
+; CHECK-O0-CUR:       # %bb.0:
+; CHECK-O0-CUR-NEXT:    vmovd %xmm0, %eax
+; CHECK-O0-CUR-NEXT:    movl %eax, (%rdi)
+; CHECK-O0-CUR-NEXT:    movl %eax, 4(%rdi)
+; CHECK-O0-CUR-NEXT:    retq
+;
+; CHECK-O3-CUR-LABEL: widen_broadcast2_unaligned:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    vmovd %xmm0, %eax
+; CHECK-O3-CUR-NEXT:    movl %eax, (%rdi)
+; CHECK-O3-CUR-NEXT:    movl %eax, 4(%rdi)
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O0-EX-LABEL: widen_broadcast2_unaligned:
+; CHECK-O0-EX:       # %bb.0:
+; CHECK-O0-EX-NEXT:    vmovd %xmm0, (%rdi)
+; CHECK-O0-EX-NEXT:    vmovd %xmm0, 4(%rdi)
+; CHECK-O0-EX-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: widen_broadcast2_unaligned:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    vmovss %xmm0, (%rdi)
+; CHECK-O3-EX-NEXT:    vmovss %xmm0, 4(%rdi)
+; CHECK-O3-EX-NEXT:    retq
   %v1 = extractelement <2 x i32> %vec, i32 0
   %p1 = getelementptr i32, i32* %p0, i64 1
   store atomic i32 %v1, i32* %p0 unordered, align 4
@@ -655,12 +605,6 @@ define i64 @load_fold_add2(i64* %p, i64
 ; CHECK-O3-NEXT:    movq %rsi, %rax
 ; CHECK-O3-NEXT:    addq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_add2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq %rsi, %rax
-; CHECK-EX-NEXT:    addq (%rdi), %rax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = add i64 %v, %v2
   ret i64 %ret
@@ -673,17 +617,17 @@ define i64 @load_fold_add3(i64* %p1, i64
 ; CHECK-O0-NEXT:    addq (%rsi), %rax
 ; CHECK-O0-NEXT:    retq
 ;
-; CHECK-O3-LABEL: load_fold_add3:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rsi), %rax
-; CHECK-O3-NEXT:    addq (%rdi), %rax
-; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_add3:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    addq (%rsi), %rax
-; CHECK-EX-NEXT:    retq
+; CHECK-O3-CUR-LABEL: load_fold_add3:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    movq (%rsi), %rax
+; CHECK-O3-CUR-NEXT:    addq (%rdi), %rax
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: load_fold_add3:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    movq (%rdi), %rax
+; CHECK-O3-EX-NEXT:    addq (%rsi), %rax
+; CHECK-O3-EX-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
   %ret = add i64 %v, %v2
@@ -703,12 +647,6 @@ define i64 @load_fold_sub1(i64* %p) {
 ; CHECK-O3-NEXT:    movq (%rdi), %rax
 ; CHECK-O3-NEXT:    addq $-15, %rax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_sub1:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    addq $-15, %rax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = sub i64 %v, 15
   ret i64 %ret
@@ -750,13 +688,6 @@ define i64 @load_fold_mul1(i64* %p) {
 ; CHECK-O3-NEXT:    leaq (%rax,%rax,4), %rax
 ; CHECK-O3-NEXT:    leaq (%rax,%rax,2), %rax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_mul1:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    leaq (%rax,%rax,4), %rax
-; CHECK-EX-NEXT:    leaq (%rax,%rax,2), %rax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = mul i64 %v, 15
   ret i64 %ret
@@ -774,12 +705,6 @@ define i64 @load_fold_mul2(i64* %p, i64
 ; CHECK-O3-NEXT:    movq %rsi, %rax
 ; CHECK-O3-NEXT:    imulq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_mul2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq %rsi, %rax
-; CHECK-EX-NEXT:    imulq (%rdi), %rax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = mul i64 %v, %v2
   ret i64 %ret
@@ -792,17 +717,17 @@ define i64 @load_fold_mul3(i64* %p1, i64
 ; CHECK-O0-NEXT:    imulq (%rsi), %rax
 ; CHECK-O0-NEXT:    retq
 ;
-; CHECK-O3-LABEL: load_fold_mul3:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rsi), %rax
-; CHECK-O3-NEXT:    imulq (%rdi), %rax
-; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_mul3:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    imulq (%rsi), %rax
-; CHECK-EX-NEXT:    retq
+; CHECK-O3-CUR-LABEL: load_fold_mul3:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    movq (%rsi), %rax
+; CHECK-O3-CUR-NEXT:    imulq (%rdi), %rax
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: load_fold_mul3:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    movq (%rdi), %rax
+; CHECK-O3-EX-NEXT:    imulq (%rsi), %rax
+; CHECK-O3-EX-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
   %ret = mul i64 %v, %v2
@@ -831,19 +756,6 @@ define i64 @load_fold_sdiv1(i64* %p) {
 ; CHECK-O3-NEXT:    sarq $3, %rdx
 ; CHECK-O3-NEXT:    addq %rdx, %rax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_sdiv1:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rcx
-; CHECK-EX-NEXT:    movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
-; CHECK-EX-NEXT:    movq %rcx, %rax
-; CHECK-EX-NEXT:    imulq %rdx
-; CHECK-EX-NEXT:    addq %rcx, %rdx
-; CHECK-EX-NEXT:    movq %rdx, %rax
-; CHECK-EX-NEXT:    shrq $63, %rax
-; CHECK-EX-NEXT:    sarq $3, %rdx
-; CHECK-EX-NEXT:    addq %rdx, %rax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = sdiv i64 %v, 15
   ret i64 %ret
@@ -875,24 +787,6 @@ define i64 @load_fold_sdiv2(i64* %p, i64
 ; CHECK-O3-NEXT:    divl %esi
 ; CHECK-O3-NEXT:    # kill: def $eax killed $eax def $rax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_sdiv2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    movq %rax, %rcx
-; CHECK-EX-NEXT:    orq %rsi, %rcx
-; CHECK-EX-NEXT:    shrq $32, %rcx
-; CHECK-EX-NEXT:    je .LBB35_1
-; CHECK-EX-NEXT:  # %bb.2:
-; CHECK-EX-NEXT:    cqto
-; CHECK-EX-NEXT:    idivq %rsi
-; CHECK-EX-NEXT:    retq
-; CHECK-EX-NEXT:  .LBB35_1:
-; CHECK-EX-NEXT:    # kill: def $eax killed $eax killed $rax
-; CHECK-EX-NEXT:    xorl %edx, %edx
-; CHECK-EX-NEXT:    divl %esi
-; CHECK-EX-NEXT:    # kill: def $eax killed $eax def $rax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = sdiv i64 %v, %v2
   ret i64 %ret
@@ -924,25 +818,6 @@ define i64 @load_fold_sdiv3(i64* %p1, i6
 ; CHECK-O3-NEXT:    divl %ecx
 ; CHECK-O3-NEXT:    # kill: def $eax killed $eax def $rax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_sdiv3:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    movq (%rsi), %rcx
-; CHECK-EX-NEXT:    movq %rax, %rdx
-; CHECK-EX-NEXT:    orq %rcx, %rdx
-; CHECK-EX-NEXT:    shrq $32, %rdx
-; CHECK-EX-NEXT:    je .LBB36_1
-; CHECK-EX-NEXT:  # %bb.2:
-; CHECK-EX-NEXT:    cqto
-; CHECK-EX-NEXT:    idivq %rcx
-; CHECK-EX-NEXT:    retq
-; CHECK-EX-NEXT:  .LBB36_1:
-; CHECK-EX-NEXT:    # kill: def $eax killed $eax killed $rax
-; CHECK-EX-NEXT:    xorl %edx, %edx
-; CHECK-EX-NEXT:    divl %ecx
-; CHECK-EX-NEXT:    # kill: def $eax killed $eax def $rax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
   %ret = sdiv i64 %v, %v2
@@ -960,22 +835,22 @@ define i64 @load_fold_udiv1(i64* %p) {
 ; CHECK-O0-NEXT:    divq %rsi
 ; CHECK-O0-NEXT:    retq
 ;
-; CHECK-O3-LABEL: load_fold_udiv1:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889
-; CHECK-O3-NEXT:    mulq %rcx
-; CHECK-O3-NEXT:    movq %rdx, %rax
-; CHECK-O3-NEXT:    shrq $3, %rax
-; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_udiv1:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
-; CHECK-EX-NEXT:    mulq (%rdi)
-; CHECK-EX-NEXT:    movq %rdx, %rax
-; CHECK-EX-NEXT:    shrq $3, %rax
-; CHECK-EX-NEXT:    retq
+; CHECK-O3-CUR-LABEL: load_fold_udiv1:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    movq (%rdi), %rax
+; CHECK-O3-CUR-NEXT:    movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889
+; CHECK-O3-CUR-NEXT:    mulq %rcx
+; CHECK-O3-CUR-NEXT:    movq %rdx, %rax
+; CHECK-O3-CUR-NEXT:    shrq $3, %rax
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: load_fold_udiv1:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
+; CHECK-O3-EX-NEXT:    mulq (%rdi)
+; CHECK-O3-EX-NEXT:    movq %rdx, %rax
+; CHECK-O3-EX-NEXT:    shrq $3, %rax
+; CHECK-O3-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = udiv i64 %v, 15
   ret i64 %ret
@@ -1007,24 +882,6 @@ define i64 @load_fold_udiv2(i64* %p, i64
 ; CHECK-O3-NEXT:    divl %esi
 ; CHECK-O3-NEXT:    # kill: def $eax killed $eax def $rax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_udiv2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    movq %rax, %rcx
-; CHECK-EX-NEXT:    orq %rsi, %rcx
-; CHECK-EX-NEXT:    shrq $32, %rcx
-; CHECK-EX-NEXT:    je .LBB38_1
-; CHECK-EX-NEXT:  # %bb.2:
-; CHECK-EX-NEXT:    xorl %edx, %edx
-; CHECK-EX-NEXT:    divq %rsi
-; CHECK-EX-NEXT:    retq
-; CHECK-EX-NEXT:  .LBB38_1:
-; CHECK-EX-NEXT:    # kill: def $eax killed $eax killed $rax
-; CHECK-EX-NEXT:    xorl %edx, %edx
-; CHECK-EX-NEXT:    divl %esi
-; CHECK-EX-NEXT:    # kill: def $eax killed $eax def $rax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = udiv i64 %v, %v2
   ret i64 %ret
@@ -1057,25 +914,6 @@ define i64 @load_fold_udiv3(i64* %p1, i6
 ; CHECK-O3-NEXT:    divl %ecx
 ; CHECK-O3-NEXT:    # kill: def $eax killed $eax def $rax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_udiv3:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    movq (%rsi), %rcx
-; CHECK-EX-NEXT:    movq %rax, %rdx
-; CHECK-EX-NEXT:    orq %rcx, %rdx
-; CHECK-EX-NEXT:    shrq $32, %rdx
-; CHECK-EX-NEXT:    je .LBB39_1
-; CHECK-EX-NEXT:  # %bb.2:
-; CHECK-EX-NEXT:    xorl %edx, %edx
-; CHECK-EX-NEXT:    divq %rcx
-; CHECK-EX-NEXT:    retq
-; CHECK-EX-NEXT:  .LBB39_1:
-; CHECK-EX-NEXT:    # kill: def $eax killed $eax killed $rax
-; CHECK-EX-NEXT:    xorl %edx, %edx
-; CHECK-EX-NEXT:    divl %ecx
-; CHECK-EX-NEXT:    # kill: def $eax killed $eax def $rax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
   %ret = udiv i64 %v, %v2
@@ -1109,23 +947,6 @@ define i64 @load_fold_srem1(i64* %p) {
 ; CHECK-O3-NEXT:    subq %rax, %rcx
 ; CHECK-O3-NEXT:    movq %rcx, %rax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_srem1:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rcx
-; CHECK-EX-NEXT:    movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
-; CHECK-EX-NEXT:    movq %rcx, %rax
-; CHECK-EX-NEXT:    imulq %rdx
-; CHECK-EX-NEXT:    addq %rcx, %rdx
-; CHECK-EX-NEXT:    movq %rdx, %rax
-; CHECK-EX-NEXT:    shrq $63, %rax
-; CHECK-EX-NEXT:    sarq $3, %rdx
-; CHECK-EX-NEXT:    addq %rax, %rdx
-; CHECK-EX-NEXT:    leaq (%rdx,%rdx,4), %rax
-; CHECK-EX-NEXT:    leaq (%rax,%rax,2), %rax
-; CHECK-EX-NEXT:    subq %rax, %rcx
-; CHECK-EX-NEXT:    movq %rcx, %rax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = srem i64 %v, 15
   ret i64 %ret
@@ -1159,25 +980,6 @@ define i64 @load_fold_srem2(i64* %p, i64
 ; CHECK-O3-NEXT:    divl %esi
 ; CHECK-O3-NEXT:    movl %edx, %eax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_srem2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    movq %rax, %rcx
-; CHECK-EX-NEXT:    orq %rsi, %rcx
-; CHECK-EX-NEXT:    shrq $32, %rcx
-; CHECK-EX-NEXT:    je .LBB41_1
-; CHECK-EX-NEXT:  # %bb.2:
-; CHECK-EX-NEXT:    cqto
-; CHECK-EX-NEXT:    idivq %rsi
-; CHECK-EX-NEXT:    movq %rdx, %rax
-; CHECK-EX-NEXT:    retq
-; CHECK-EX-NEXT:  .LBB41_1:
-; CHECK-EX-NEXT:    # kill: def $eax killed $eax killed $rax
-; CHECK-EX-NEXT:    xorl %edx, %edx
-; CHECK-EX-NEXT:    divl %esi
-; CHECK-EX-NEXT:    movl %edx, %eax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = srem i64 %v, %v2
   ret i64 %ret
@@ -1211,26 +1013,6 @@ define i64 @load_fold_srem3(i64* %p1, i6
 ; CHECK-O3-NEXT:    divl %ecx
 ; CHECK-O3-NEXT:    movl %edx, %eax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_srem3:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    movq (%rsi), %rcx
-; CHECK-EX-NEXT:    movq %rax, %rdx
-; CHECK-EX-NEXT:    orq %rcx, %rdx
-; CHECK-EX-NEXT:    shrq $32, %rdx
-; CHECK-EX-NEXT:    je .LBB42_1
-; CHECK-EX-NEXT:  # %bb.2:
-; CHECK-EX-NEXT:    cqto
-; CHECK-EX-NEXT:    idivq %rcx
-; CHECK-EX-NEXT:    movq %rdx, %rax
-; CHECK-EX-NEXT:    retq
-; CHECK-EX-NEXT:  .LBB42_1:
-; CHECK-EX-NEXT:    # kill: def $eax killed $eax killed $rax
-; CHECK-EX-NEXT:    xorl %edx, %edx
-; CHECK-EX-NEXT:    divl %ecx
-; CHECK-EX-NEXT:    movl %edx, %eax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
   %ret = srem i64 %v, %v2
@@ -1261,19 +1043,6 @@ define i64 @load_fold_urem1(i64* %p) {
 ; CHECK-O3-NEXT:    subq %rax, %rcx
 ; CHECK-O3-NEXT:    movq %rcx, %rax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_urem1:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rcx
-; CHECK-EX-NEXT:    movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
-; CHECK-EX-NEXT:    movq %rcx, %rax
-; CHECK-EX-NEXT:    mulq %rdx
-; CHECK-EX-NEXT:    shrq $3, %rdx
-; CHECK-EX-NEXT:    leaq (%rdx,%rdx,4), %rax
-; CHECK-EX-NEXT:    leaq (%rax,%rax,2), %rax
-; CHECK-EX-NEXT:    subq %rax, %rcx
-; CHECK-EX-NEXT:    movq %rcx, %rax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = urem i64 %v, 15
   ret i64 %ret
@@ -1308,25 +1077,6 @@ define i64 @load_fold_urem2(i64* %p, i64
 ; CHECK-O3-NEXT:    divl %esi
 ; CHECK-O3-NEXT:    movl %edx, %eax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_urem2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    movq %rax, %rcx
-; CHECK-EX-NEXT:    orq %rsi, %rcx
-; CHECK-EX-NEXT:    shrq $32, %rcx
-; CHECK-EX-NEXT:    je .LBB44_1
-; CHECK-EX-NEXT:  # %bb.2:
-; CHECK-EX-NEXT:    xorl %edx, %edx
-; CHECK-EX-NEXT:    divq %rsi
-; CHECK-EX-NEXT:    movq %rdx, %rax
-; CHECK-EX-NEXT:    retq
-; CHECK-EX-NEXT:  .LBB44_1:
-; CHECK-EX-NEXT:    # kill: def $eax killed $eax killed $rax
-; CHECK-EX-NEXT:    xorl %edx, %edx
-; CHECK-EX-NEXT:    divl %esi
-; CHECK-EX-NEXT:    movl %edx, %eax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = urem i64 %v, %v2
   ret i64 %ret
@@ -1361,26 +1111,6 @@ define i64 @load_fold_urem3(i64* %p1, i6
 ; CHECK-O3-NEXT:    divl %ecx
 ; CHECK-O3-NEXT:    movl %edx, %eax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_urem3:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    movq (%rsi), %rcx
-; CHECK-EX-NEXT:    movq %rax, %rdx
-; CHECK-EX-NEXT:    orq %rcx, %rdx
-; CHECK-EX-NEXT:    shrq $32, %rdx
-; CHECK-EX-NEXT:    je .LBB45_1
-; CHECK-EX-NEXT:  # %bb.2:
-; CHECK-EX-NEXT:    xorl %edx, %edx
-; CHECK-EX-NEXT:    divq %rcx
-; CHECK-EX-NEXT:    movq %rdx, %rax
-; CHECK-EX-NEXT:    retq
-; CHECK-EX-NEXT:  .LBB45_1:
-; CHECK-EX-NEXT:    # kill: def $eax killed $eax killed $rax
-; CHECK-EX-NEXT:    xorl %edx, %edx
-; CHECK-EX-NEXT:    divl %ecx
-; CHECK-EX-NEXT:    movl %edx, %eax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
   %ret = urem i64 %v, %v2
@@ -1412,11 +1142,6 @@ define i64 @load_fold_shl2(i64* %p, i64
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    shlxq %rsi, (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_shl2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    shlxq %rsi, (%rdi), %rax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = shl i64 %v, %v2
   ret i64 %ret
@@ -1436,12 +1161,6 @@ define i64 @load_fold_shl3(i64* %p1, i64
 ; CHECK-O3-NEXT:    movq (%rsi), %rax
 ; CHECK-O3-NEXT:    shlxq %rax, (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_shl3:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rsi), %rax
-; CHECK-EX-NEXT:    shlxq %rax, (%rdi), %rax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
   %ret = shl i64 %v, %v2
@@ -1473,11 +1192,6 @@ define i64 @load_fold_lshr2(i64* %p, i64
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    shrxq %rsi, (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_lshr2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    shrxq %rsi, (%rdi), %rax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = lshr i64 %v, %v2
   ret i64 %ret
@@ -1497,12 +1211,6 @@ define i64 @load_fold_lshr3(i64* %p1, i6
 ; CHECK-O3-NEXT:    movq (%rsi), %rax
 ; CHECK-O3-NEXT:    shrxq %rax, (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_lshr3:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rsi), %rax
-; CHECK-EX-NEXT:    shrxq %rax, (%rdi), %rax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
   %ret = lshr i64 %v, %v2
@@ -1534,11 +1242,6 @@ define i64 @load_fold_ashr2(i64* %p, i64
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    sarxq %rsi, (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_ashr2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    sarxq %rsi, (%rdi), %rax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = ashr i64 %v, %v2
   ret i64 %ret
@@ -1558,12 +1261,6 @@ define i64 @load_fold_ashr3(i64* %p1, i6
 ; CHECK-O3-NEXT:    movq (%rsi), %rax
 ; CHECK-O3-NEXT:    sarxq %rax, (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_ashr3:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rsi), %rax
-; CHECK-EX-NEXT:    sarxq %rax, (%rdi), %rax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
   %ret = ashr i64 %v, %v2
@@ -1583,12 +1280,6 @@ define i64 @load_fold_and1(i64* %p) {
 ; CHECK-O3-NEXT:    movq (%rdi), %rax
 ; CHECK-O3-NEXT:    andl $15, %eax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_and1:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    andl $15, %eax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = and i64 %v, 15
   ret i64 %ret
@@ -1606,12 +1297,6 @@ define i64 @load_fold_and2(i64* %p, i64
 ; CHECK-O3-NEXT:    movq %rsi, %rax
 ; CHECK-O3-NEXT:    andq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_and2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq %rsi, %rax
-; CHECK-EX-NEXT:    andq (%rdi), %rax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = and i64 %v, %v2
   ret i64 %ret
@@ -1624,17 +1309,17 @@ define i64 @load_fold_and3(i64* %p1, i64
 ; CHECK-O0-NEXT:    andq (%rsi), %rax
 ; CHECK-O0-NEXT:    retq
 ;
-; CHECK-O3-LABEL: load_fold_and3:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rsi), %rax
-; CHECK-O3-NEXT:    andq (%rdi), %rax
-; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_and3:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    andq (%rsi), %rax
-; CHECK-EX-NEXT:    retq
+; CHECK-O3-CUR-LABEL: load_fold_and3:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    movq (%rsi), %rax
+; CHECK-O3-CUR-NEXT:    andq (%rdi), %rax
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: load_fold_and3:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    movq (%rdi), %rax
+; CHECK-O3-EX-NEXT:    andq (%rsi), %rax
+; CHECK-O3-EX-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
   %ret = and i64 %v, %v2
@@ -1665,12 +1350,6 @@ define i64 @load_fold_or2(i64* %p, i64 %
 ; CHECK-O3-NEXT:    movq %rsi, %rax
 ; CHECK-O3-NEXT:    orq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_or2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq %rsi, %rax
-; CHECK-EX-NEXT:    orq (%rdi), %rax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = or i64 %v, %v2
   ret i64 %ret
@@ -1683,17 +1362,17 @@ define i64 @load_fold_or3(i64* %p1, i64*
 ; CHECK-O0-NEXT:    orq (%rsi), %rax
 ; CHECK-O0-NEXT:    retq
 ;
-; CHECK-O3-LABEL: load_fold_or3:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rsi), %rax
-; CHECK-O3-NEXT:    orq (%rdi), %rax
-; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_or3:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    orq (%rsi), %rax
-; CHECK-EX-NEXT:    retq
+; CHECK-O3-CUR-LABEL: load_fold_or3:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    movq (%rsi), %rax
+; CHECK-O3-CUR-NEXT:    orq (%rdi), %rax
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: load_fold_or3:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    movq (%rdi), %rax
+; CHECK-O3-EX-NEXT:    orq (%rsi), %rax
+; CHECK-O3-EX-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
   %ret = or i64 %v, %v2
@@ -1724,12 +1403,6 @@ define i64 @load_fold_xor2(i64* %p, i64
 ; CHECK-O3-NEXT:    movq %rsi, %rax
 ; CHECK-O3-NEXT:    xorq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_xor2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq %rsi, %rax
-; CHECK-EX-NEXT:    xorq (%rdi), %rax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = xor i64 %v, %v2
   ret i64 %ret
@@ -1742,17 +1415,17 @@ define i64 @load_fold_xor3(i64* %p1, i64
 ; CHECK-O0-NEXT:    xorq (%rsi), %rax
 ; CHECK-O0-NEXT:    retq
 ;
-; CHECK-O3-LABEL: load_fold_xor3:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rsi), %rax
-; CHECK-O3-NEXT:    xorq (%rdi), %rax
-; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_xor3:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    xorq (%rsi), %rax
-; CHECK-EX-NEXT:    retq
+; CHECK-O3-CUR-LABEL: load_fold_xor3:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    movq (%rsi), %rax
+; CHECK-O3-CUR-NEXT:    xorq (%rdi), %rax
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: load_fold_xor3:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    movq (%rdi), %rax
+; CHECK-O3-EX-NEXT:    xorq (%rsi), %rax
+; CHECK-O3-EX-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
   %ret = xor i64 %v, %v2
@@ -1774,12 +1447,6 @@ define i1 @load_fold_icmp1(i64* %p) {
 ; CHECK-O3-NEXT:    cmpq $15, (%rdi)
 ; CHECK-O3-NEXT:    sete %al
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_icmp1:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    cmpq $15, (%rdi)
-; CHECK-EX-NEXT:    sete %al
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = icmp eq i64 %v, 15
   ret i1 %ret
@@ -1800,12 +1467,6 @@ define i1 @load_fold_icmp2(i64* %p, i64
 ; CHECK-O3-NEXT:    cmpq %rsi, (%rdi)
 ; CHECK-O3-NEXT:    sete %al
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_icmp2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    cmpq %rsi, (%rdi)
-; CHECK-EX-NEXT:    sete %al
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = icmp eq i64 %v, %v2
   ret i1 %ret
@@ -1822,19 +1483,19 @@ define i1 @load_fold_icmp3(i64* %p1, i64
 ; CHECK-O0-NEXT:    movb %dl, %al
 ; CHECK-O0-NEXT:    retq
 ;
-; CHECK-O3-LABEL: load_fold_icmp3:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rsi), %rax
-; CHECK-O3-NEXT:    cmpq %rax, (%rdi)
-; CHECK-O3-NEXT:    sete %al
-; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_fold_icmp3:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    cmpq (%rsi), %rax
-; CHECK-EX-NEXT:    sete %al
-; CHECK-EX-NEXT:    retq
+; CHECK-O3-CUR-LABEL: load_fold_icmp3:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    movq (%rsi), %rax
+; CHECK-O3-CUR-NEXT:    cmpq %rax, (%rdi)
+; CHECK-O3-CUR-NEXT:    sete %al
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: load_fold_icmp3:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    movq (%rdi), %rax
+; CHECK-O3-EX-NEXT:    cmpq (%rsi), %rax
+; CHECK-O3-EX-NEXT:    sete %al
+; CHECK-O3-EX-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
   %ret = icmp eq i64 %v, %v2
@@ -1860,11 +1521,6 @@ define void @rmw_fold_add1(i64* %p, i64
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    addq $15, (%rdi)
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_add1:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    addq $15, (%rdi)
-; CHECK-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = add i64 %prev, 15
   store atomic i64 %val, i64* %p unordered, align 8
@@ -1884,11 +1540,6 @@ define void @rmw_fold_add2(i64* %p, i64
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    addq %rsi, (%rdi)
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_add2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    addq %rsi, (%rdi)
-; CHECK-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = add i64 %prev, %v
   store atomic i64 %val, i64* %p unordered, align 8
@@ -1908,11 +1559,6 @@ define void @rmw_fold_sub1(i64* %p, i64
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    addq $-15, (%rdi)
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_sub1:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    addq $-15, (%rdi)
-; CHECK-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = sub i64 %prev, 15
   store atomic i64 %val, i64* %p unordered, align 8
@@ -1932,11 +1578,6 @@ define void @rmw_fold_sub2(i64* %p, i64
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    subq %rsi, (%rdi)
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_sub2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    subq %rsi, (%rdi)
-; CHECK-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = sub i64 %prev, %v
   store atomic i64 %val, i64* %p unordered, align 8
@@ -1972,12 +1613,6 @@ define void @rmw_fold_mul2(i64* %p, i64
 ; CHECK-O3-NEXT:    imulq (%rdi), %rsi
 ; CHECK-O3-NEXT:    movq %rsi, (%rdi)
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_mul2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    imulq (%rdi), %rsi
-; CHECK-EX-NEXT:    movq %rsi, (%rdi)
-; CHECK-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = mul i64 %prev, %v
   store atomic i64 %val, i64* %p unordered, align 8
@@ -2014,20 +1649,6 @@ define void @rmw_fold_sdiv1(i64* %p, i64
 ; CHECK-O3-NEXT:    addq %rax, %rdx
 ; CHECK-O3-NEXT:    movq %rdx, (%rdi)
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_sdiv1:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rcx
-; CHECK-EX-NEXT:    movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
-; CHECK-EX-NEXT:    movq %rcx, %rax
-; CHECK-EX-NEXT:    imulq %rdx
-; CHECK-EX-NEXT:    addq %rcx, %rdx
-; CHECK-EX-NEXT:    movq %rdx, %rax
-; CHECK-EX-NEXT:    shrq $63, %rax
-; CHECK-EX-NEXT:    sarq $3, %rdx
-; CHECK-EX-NEXT:    addq %rax, %rdx
-; CHECK-EX-NEXT:    movq %rdx, (%rdi)
-; CHECK-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = sdiv i64 %prev, 15
   store atomic i64 %val, i64* %p unordered, align 8
@@ -2063,26 +1684,6 @@ define void @rmw_fold_sdiv2(i64* %p, i64
 ; CHECK-O3-NEXT:    # kill: def $eax killed $eax def $rax
 ; CHECK-O3-NEXT:    movq %rax, (%rdi)
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_sdiv2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    movq %rax, %rcx
-; CHECK-EX-NEXT:    orq %rsi, %rcx
-; CHECK-EX-NEXT:    shrq $32, %rcx
-; CHECK-EX-NEXT:    je .LBB74_1
-; CHECK-EX-NEXT:  # %bb.2:
-; CHECK-EX-NEXT:    cqto
-; CHECK-EX-NEXT:    idivq %rsi
-; CHECK-EX-NEXT:    movq %rax, (%rdi)
-; CHECK-EX-NEXT:    retq
-; CHECK-EX-NEXT:  .LBB74_1:
-; CHECK-EX-NEXT:    # kill: def $eax killed $eax killed $rax
-; CHECK-EX-NEXT:    xorl %edx, %edx
-; CHECK-EX-NEXT:    divl %esi
-; CHECK-EX-NEXT:    # kill: def $eax killed $eax def $rax
-; CHECK-EX-NEXT:    movq %rax, (%rdi)
-; CHECK-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = sdiv i64 %prev, %v
   store atomic i64 %val, i64* %p unordered, align 8
@@ -2091,22 +1692,31 @@ define void @rmw_fold_sdiv2(i64* %p, i64
 
 ; Legal, as expected
 define void @rmw_fold_udiv1(i64* %p, i64 %v) {
-; CHECK-NOX-LABEL: rmw_fold_udiv1:
-; CHECK-NOX:       # %bb.0:
-; CHECK-NOX-NEXT:    movq (%rdi), %rax
-; CHECK-NOX-NEXT:    movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889
-; CHECK-NOX-NEXT:    mulq %rcx
-; CHECK-NOX-NEXT:    shrq $3, %rdx
-; CHECK-NOX-NEXT:    movq %rdx, (%rdi)
-; CHECK-NOX-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_udiv1:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
-; CHECK-EX-NEXT:    mulq (%rdi)
-; CHECK-EX-NEXT:    shrq $3, %rdx
-; CHECK-EX-NEXT:    movq %rdx, (%rdi)
-; CHECK-EX-NEXT:    retq
+; CHECK-O0-LABEL: rmw_fold_udiv1:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movq (%rdi), %rax
+; CHECK-O0-NEXT:    movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889
+; CHECK-O0-NEXT:    mulq %rcx
+; CHECK-O0-NEXT:    shrq $3, %rdx
+; CHECK-O0-NEXT:    movq %rdx, (%rdi)
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-CUR-LABEL: rmw_fold_udiv1:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    movq (%rdi), %rax
+; CHECK-O3-CUR-NEXT:    movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889
+; CHECK-O3-CUR-NEXT:    mulq %rcx
+; CHECK-O3-CUR-NEXT:    shrq $3, %rdx
+; CHECK-O3-CUR-NEXT:    movq %rdx, (%rdi)
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: rmw_fold_udiv1:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
+; CHECK-O3-EX-NEXT:    mulq (%rdi)
+; CHECK-O3-EX-NEXT:    shrq $3, %rdx
+; CHECK-O3-EX-NEXT:    movq %rdx, (%rdi)
+; CHECK-O3-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = udiv i64 %prev, 15
   store atomic i64 %val, i64* %p unordered, align 8
@@ -2143,26 +1753,6 @@ define void @rmw_fold_udiv2(i64* %p, i64
 ; CHECK-O3-NEXT:    # kill: def $eax killed $eax def $rax
 ; CHECK-O3-NEXT:    movq %rax, (%rdi)
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_udiv2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    movq %rax, %rcx
-; CHECK-EX-NEXT:    orq %rsi, %rcx
-; CHECK-EX-NEXT:    shrq $32, %rcx
-; CHECK-EX-NEXT:    je .LBB76_1
-; CHECK-EX-NEXT:  # %bb.2:
-; CHECK-EX-NEXT:    xorl %edx, %edx
-; CHECK-EX-NEXT:    divq %rsi
-; CHECK-EX-NEXT:    movq %rax, (%rdi)
-; CHECK-EX-NEXT:    retq
-; CHECK-EX-NEXT:  .LBB76_1:
-; CHECK-EX-NEXT:    # kill: def $eax killed $eax killed $rax
-; CHECK-EX-NEXT:    xorl %edx, %edx
-; CHECK-EX-NEXT:    divl %esi
-; CHECK-EX-NEXT:    # kill: def $eax killed $eax def $rax
-; CHECK-EX-NEXT:    movq %rax, (%rdi)
-; CHECK-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = udiv i64 %prev, %v
   store atomic i64 %val, i64* %p unordered, align 8
@@ -2205,23 +1795,6 @@ define void @rmw_fold_srem1(i64* %p, i64
 ; CHECK-O3-NEXT:    subq %rax, %rcx
 ; CHECK-O3-NEXT:    movq %rcx, (%rdi)
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_srem1:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rcx
-; CHECK-EX-NEXT:    movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
-; CHECK-EX-NEXT:    movq %rcx, %rax
-; CHECK-EX-NEXT:    imulq %rdx
-; CHECK-EX-NEXT:    addq %rcx, %rdx
-; CHECK-EX-NEXT:    movq %rdx, %rax
-; CHECK-EX-NEXT:    shrq $63, %rax
-; CHECK-EX-NEXT:    sarq $3, %rdx
-; CHECK-EX-NEXT:    addq %rax, %rdx
-; CHECK-EX-NEXT:    leaq (%rdx,%rdx,4), %rax
-; CHECK-EX-NEXT:    leaq (%rax,%rax,2), %rax
-; CHECK-EX-NEXT:    subq %rax, %rcx
-; CHECK-EX-NEXT:    movq %rcx, (%rdi)
-; CHECK-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = srem i64 %prev, 15
   store atomic i64 %val, i64* %p unordered, align 8
@@ -2257,26 +1830,6 @@ define void @rmw_fold_srem2(i64* %p, i64
 ; CHECK-O3-NEXT:    # kill: def $edx killed $edx def $rdx
 ; CHECK-O3-NEXT:    movq %rdx, (%rdi)
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_srem2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    movq %rax, %rcx
-; CHECK-EX-NEXT:    orq %rsi, %rcx
-; CHECK-EX-NEXT:    shrq $32, %rcx
-; CHECK-EX-NEXT:    je .LBB78_1
-; CHECK-EX-NEXT:  # %bb.2:
-; CHECK-EX-NEXT:    cqto
-; CHECK-EX-NEXT:    idivq %rsi
-; CHECK-EX-NEXT:    movq %rdx, (%rdi)
-; CHECK-EX-NEXT:    retq
-; CHECK-EX-NEXT:  .LBB78_1:
-; CHECK-EX-NEXT:    # kill: def $eax killed $eax killed $rax
-; CHECK-EX-NEXT:    xorl %edx, %edx
-; CHECK-EX-NEXT:    divl %esi
-; CHECK-EX-NEXT:    # kill: def $edx killed $edx def $rdx
-; CHECK-EX-NEXT:    movq %rdx, (%rdi)
-; CHECK-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = srem i64 %prev, %v
   store atomic i64 %val, i64* %p unordered, align 8
@@ -2311,19 +1864,6 @@ define void @rmw_fold_urem1(i64* %p, i64
 ; CHECK-O3-NEXT:    subq %rax, %rcx
 ; CHECK-O3-NEXT:    movq %rcx, (%rdi)
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_urem1:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rcx
-; CHECK-EX-NEXT:    movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
-; CHECK-EX-NEXT:    movq %rcx, %rax
-; CHECK-EX-NEXT:    mulq %rdx
-; CHECK-EX-NEXT:    shrq $3, %rdx
-; CHECK-EX-NEXT:    leaq (%rdx,%rdx,4), %rax
-; CHECK-EX-NEXT:    leaq (%rax,%rax,2), %rax
-; CHECK-EX-NEXT:    subq %rax, %rcx
-; CHECK-EX-NEXT:    movq %rcx, (%rdi)
-; CHECK-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = urem i64 %prev, 15
   store atomic i64 %val, i64* %p unordered, align 8
@@ -2360,26 +1900,6 @@ define void @rmw_fold_urem2(i64* %p, i64
 ; CHECK-O3-NEXT:    # kill: def $edx killed $edx def $rdx
 ; CHECK-O3-NEXT:    movq %rdx, (%rdi)
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_urem2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    movq %rax, %rcx
-; CHECK-EX-NEXT:    orq %rsi, %rcx
-; CHECK-EX-NEXT:    shrq $32, %rcx
-; CHECK-EX-NEXT:    je .LBB80_1
-; CHECK-EX-NEXT:  # %bb.2:
-; CHECK-EX-NEXT:    xorl %edx, %edx
-; CHECK-EX-NEXT:    divq %rsi
-; CHECK-EX-NEXT:    movq %rdx, (%rdi)
-; CHECK-EX-NEXT:    retq
-; CHECK-EX-NEXT:  .LBB80_1:
-; CHECK-EX-NEXT:    # kill: def $eax killed $eax killed $rax
-; CHECK-EX-NEXT:    xorl %edx, %edx
-; CHECK-EX-NEXT:    divl %esi
-; CHECK-EX-NEXT:    # kill: def $edx killed $edx def $rdx
-; CHECK-EX-NEXT:    movq %rdx, (%rdi)
-; CHECK-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = urem i64 %prev, %v
   store atomic i64 %val, i64* %p unordered, align 8
@@ -2388,17 +1908,24 @@ define void @rmw_fold_urem2(i64* %p, i64
 
 ; Legal to fold (TODO)
 define void @rmw_fold_shl1(i64* %p, i64 %v) {
-; CHECK-NOX-LABEL: rmw_fold_shl1:
-; CHECK-NOX:       # %bb.0:
-; CHECK-NOX-NEXT:    movq (%rdi), %rax
-; CHECK-NOX-NEXT:    shlq $15, %rax
-; CHECK-NOX-NEXT:    movq %rax, (%rdi)
-; CHECK-NOX-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_shl1:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    shlq $15, (%rdi)
-; CHECK-EX-NEXT:    retq
+; CHECK-O0-LABEL: rmw_fold_shl1:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movq (%rdi), %rax
+; CHECK-O0-NEXT:    shlq $15, %rax
+; CHECK-O0-NEXT:    movq %rax, (%rdi)
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-CUR-LABEL: rmw_fold_shl1:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    movq (%rdi), %rax
+; CHECK-O3-CUR-NEXT:    shlq $15, %rax
+; CHECK-O3-CUR-NEXT:    movq %rax, (%rdi)
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: rmw_fold_shl1:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    shlq $15, (%rdi)
+; CHECK-O3-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = shl i64 %prev, 15
   store atomic i64 %val, i64* %p unordered, align 8
@@ -2417,18 +1944,18 @@ define void @rmw_fold_shl2(i64* %p, i64
 ; CHECK-O0-NEXT:    movq %rax, (%rdi)
 ; CHECK-O0-NEXT:    retq
 ;
-; CHECK-O3-LABEL: rmw_fold_shl2:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    shlxq %rsi, (%rdi), %rax
-; CHECK-O3-NEXT:    movq %rax, (%rdi)
-; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_shl2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq %rsi, %rcx
-; CHECK-EX-NEXT:    # kill: def $cl killed $cl killed $rcx
-; CHECK-EX-NEXT:    shlq %cl, (%rdi)
-; CHECK-EX-NEXT:    retq
+; CHECK-O3-CUR-LABEL: rmw_fold_shl2:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    shlxq %rsi, (%rdi), %rax
+; CHECK-O3-CUR-NEXT:    movq %rax, (%rdi)
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: rmw_fold_shl2:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    movq %rsi, %rcx
+; CHECK-O3-EX-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-O3-EX-NEXT:    shlq %cl, (%rdi)
+; CHECK-O3-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = shl i64 %prev, %v
   store atomic i64 %val, i64* %p unordered, align 8
@@ -2437,17 +1964,24 @@ define void @rmw_fold_shl2(i64* %p, i64
 
 ; Legal to fold (TODO)
 define void @rmw_fold_lshr1(i64* %p, i64 %v) {
-; CHECK-NOX-LABEL: rmw_fold_lshr1:
-; CHECK-NOX:       # %bb.0:
-; CHECK-NOX-NEXT:    movq (%rdi), %rax
-; CHECK-NOX-NEXT:    shrq $15, %rax
-; CHECK-NOX-NEXT:    movq %rax, (%rdi)
-; CHECK-NOX-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_lshr1:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    shrq $15, (%rdi)
-; CHECK-EX-NEXT:    retq
+; CHECK-O0-LABEL: rmw_fold_lshr1:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movq (%rdi), %rax
+; CHECK-O0-NEXT:    shrq $15, %rax
+; CHECK-O0-NEXT:    movq %rax, (%rdi)
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-CUR-LABEL: rmw_fold_lshr1:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    movq (%rdi), %rax
+; CHECK-O3-CUR-NEXT:    shrq $15, %rax
+; CHECK-O3-CUR-NEXT:    movq %rax, (%rdi)
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: rmw_fold_lshr1:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    shrq $15, (%rdi)
+; CHECK-O3-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = lshr i64 %prev, 15
   store atomic i64 %val, i64* %p unordered, align 8
@@ -2466,18 +2000,18 @@ define void @rmw_fold_lshr2(i64* %p, i64
 ; CHECK-O0-NEXT:    movq %rax, (%rdi)
 ; CHECK-O0-NEXT:    retq
 ;
-; CHECK-O3-LABEL: rmw_fold_lshr2:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    shrxq %rsi, (%rdi), %rax
-; CHECK-O3-NEXT:    movq %rax, (%rdi)
-; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_lshr2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq %rsi, %rcx
-; CHECK-EX-NEXT:    # kill: def $cl killed $cl killed $rcx
-; CHECK-EX-NEXT:    shrq %cl, (%rdi)
-; CHECK-EX-NEXT:    retq
+; CHECK-O3-CUR-LABEL: rmw_fold_lshr2:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    shrxq %rsi, (%rdi), %rax
+; CHECK-O3-CUR-NEXT:    movq %rax, (%rdi)
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: rmw_fold_lshr2:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    movq %rsi, %rcx
+; CHECK-O3-EX-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-O3-EX-NEXT:    shrq %cl, (%rdi)
+; CHECK-O3-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = lshr i64 %prev, %v
   store atomic i64 %val, i64* %p unordered, align 8
@@ -2486,17 +2020,24 @@ define void @rmw_fold_lshr2(i64* %p, i64
 
 ; Legal to fold (TODO)
 define void @rmw_fold_ashr1(i64* %p, i64 %v) {
-; CHECK-NOX-LABEL: rmw_fold_ashr1:
-; CHECK-NOX:       # %bb.0:
-; CHECK-NOX-NEXT:    movq (%rdi), %rax
-; CHECK-NOX-NEXT:    sarq $15, %rax
-; CHECK-NOX-NEXT:    movq %rax, (%rdi)
-; CHECK-NOX-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_ashr1:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    sarq $15, (%rdi)
-; CHECK-EX-NEXT:    retq
+; CHECK-O0-LABEL: rmw_fold_ashr1:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movq (%rdi), %rax
+; CHECK-O0-NEXT:    sarq $15, %rax
+; CHECK-O0-NEXT:    movq %rax, (%rdi)
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-CUR-LABEL: rmw_fold_ashr1:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    movq (%rdi), %rax
+; CHECK-O3-CUR-NEXT:    sarq $15, %rax
+; CHECK-O3-CUR-NEXT:    movq %rax, (%rdi)
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: rmw_fold_ashr1:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    sarq $15, (%rdi)
+; CHECK-O3-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = ashr i64 %prev, 15
   store atomic i64 %val, i64* %p unordered, align 8
@@ -2515,18 +2056,18 @@ define void @rmw_fold_ashr2(i64* %p, i64
 ; CHECK-O0-NEXT:    movq %rax, (%rdi)
 ; CHECK-O0-NEXT:    retq
 ;
-; CHECK-O3-LABEL: rmw_fold_ashr2:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    sarxq %rsi, (%rdi), %rax
-; CHECK-O3-NEXT:    movq %rax, (%rdi)
-; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_ashr2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq %rsi, %rcx
-; CHECK-EX-NEXT:    # kill: def $cl killed $cl killed $rcx
-; CHECK-EX-NEXT:    sarq %cl, (%rdi)
-; CHECK-EX-NEXT:    retq
+; CHECK-O3-CUR-LABEL: rmw_fold_ashr2:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    sarxq %rsi, (%rdi), %rax
+; CHECK-O3-CUR-NEXT:    movq %rax, (%rdi)
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: rmw_fold_ashr2:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    movq %rsi, %rcx
+; CHECK-O3-EX-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-O3-EX-NEXT:    sarq %cl, (%rdi)
+; CHECK-O3-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = ashr i64 %prev, %v
   store atomic i64 %val, i64* %p unordered, align 8
@@ -2548,11 +2089,6 @@ define void @rmw_fold_and1(i64* %p, i64
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    andq $15, (%rdi)
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_and1:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    andq $15, (%rdi)
-; CHECK-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = and i64 %prev, 15
   store atomic i64 %val, i64* %p unordered, align 8
@@ -2572,11 +2108,6 @@ define void @rmw_fold_and2(i64* %p, i64
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    andq %rsi, (%rdi)
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_and2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    andq %rsi, (%rdi)
-; CHECK-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = and i64 %prev, %v
   store atomic i64 %val, i64* %p unordered, align 8
@@ -2596,11 +2127,6 @@ define void @rmw_fold_or1(i64* %p, i64 %
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    orq $15, (%rdi)
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_or1:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    orq $15, (%rdi)
-; CHECK-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = or i64 %prev, 15
   store atomic i64 %val, i64* %p unordered, align 8
@@ -2620,11 +2146,6 @@ define void @rmw_fold_or2(i64* %p, i64 %
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    orq %rsi, (%rdi)
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_or2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    orq %rsi, (%rdi)
-; CHECK-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = or i64 %prev, %v
   store atomic i64 %val, i64* %p unordered, align 8
@@ -2644,11 +2165,6 @@ define void @rmw_fold_xor1(i64* %p, i64
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    xorq $15, (%rdi)
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_xor1:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    xorq $15, (%rdi)
-; CHECK-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = xor i64 %prev, 15
   store atomic i64 %val, i64* %p unordered, align 8
@@ -2668,11 +2184,6 @@ define void @rmw_fold_xor2(i64* %p, i64
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    xorq %rsi, (%rdi)
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: rmw_fold_xor2:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    xorq %rsi, (%rdi)
-; CHECK-EX-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = xor i64 %prev, %v
   store atomic i64 %val, i64* %p unordered, align 8
@@ -2709,13 +2220,6 @@ define i32 @fold_trunc_add(i64* %p, i32
 ; CHECK-O3-NEXT:    addl %esi, %eax
 ; CHECK-O3-NEXT:    # kill: def $eax killed $eax killed $rax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: fold_trunc_add:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    addl %esi, %eax
-; CHECK-EX-NEXT:    # kill: def $eax killed $eax killed $rax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %trunc = trunc i64 %v to i32
   %ret = add i32 %trunc, %v2
@@ -2737,13 +2241,6 @@ define i32 @fold_trunc_and(i64* %p, i32
 ; CHECK-O3-NEXT:    andl %esi, %eax
 ; CHECK-O3-NEXT:    # kill: def $eax killed $eax killed $rax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: fold_trunc_and:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    andl %esi, %eax
-; CHECK-EX-NEXT:    # kill: def $eax killed $eax killed $rax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %trunc = trunc i64 %v to i32
   %ret = and i32 %trunc, %v2
@@ -2765,13 +2262,6 @@ define i32 @fold_trunc_or(i64* %p, i32 %
 ; CHECK-O3-NEXT:    orl %esi, %eax
 ; CHECK-O3-NEXT:    # kill: def $eax killed $eax killed $rax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: fold_trunc_or:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    orl %esi, %eax
-; CHECK-EX-NEXT:    # kill: def $eax killed $eax killed $rax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %trunc = trunc i64 %v to i32
   %ret = or i32 %trunc, %v2
@@ -2799,15 +2289,6 @@ define i32 @split_load(i64* %p) {
 ; CHECK-O3-NEXT:    orl %eax, %ecx
 ; CHECK-O3-NEXT:    movzbl %cl, %eax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: split_load:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    movq %rax, %rcx
-; CHECK-EX-NEXT:    shrq $32, %rcx
-; CHECK-EX-NEXT:    orl %eax, %ecx
-; CHECK-EX-NEXT:    movzbl %cl, %eax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %b1 = trunc i64 %v to i8
   %v.shift = lshr i64 %v, 32
@@ -2834,16 +2315,22 @@ define i64 @constant_folding(i64* %p) {
 
 ; Legal to forward and fold (TODO)
 define i64 @load_forwarding(i64* %p) {
-; CHECK-NOX-LABEL: load_forwarding:
-; CHECK-NOX:       # %bb.0:
-; CHECK-NOX-NEXT:    movq (%rdi), %rax
-; CHECK-NOX-NEXT:    orq (%rdi), %rax
-; CHECK-NOX-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_forwarding:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq (%rdi), %rax
-; CHECK-EX-NEXT:    retq
+; CHECK-O0-LABEL: load_forwarding:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movq (%rdi), %rax
+; CHECK-O0-NEXT:    orq (%rdi), %rax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-CUR-LABEL: load_forwarding:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    movq (%rdi), %rax
+; CHECK-O3-CUR-NEXT:    orq (%rdi), %rax
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: load_forwarding:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    movq (%rdi), %rax
+; CHECK-O3-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %v2 = load atomic i64, i64* %p unordered, align 8
   %ret = or i64 %v, %v2
@@ -2949,31 +2436,32 @@ define i64 @fold_constant(i64 %arg) {
 ; CHECK-O3-NEXT:    movq %rdi, %rax
 ; CHECK-O3-NEXT:    addq {{.*}}(%rip), %rax
 ; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: fold_constant:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq %rdi, %rax
-; CHECK-EX-NEXT:    addq {{.*}}(%rip), %rax
-; CHECK-EX-NEXT:    retq
   %v = load atomic i64, i64* @Constant unordered, align 8
   %ret = add i64 %v, %arg
   ret i64 %ret
 }
 
 define i64 @fold_constant_clobber(i64* %p, i64 %arg) {
-; CHECK-NOX-LABEL: fold_constant_clobber:
-; CHECK-NOX:       # %bb.0:
-; CHECK-NOX-NEXT:    movq {{.*}}(%rip), %rax
-; CHECK-NOX-NEXT:    movq $5, (%rdi)
-; CHECK-NOX-NEXT:    addq %rsi, %rax
-; CHECK-NOX-NEXT:    retq
-;
-; CHECK-EX-LABEL: fold_constant_clobber:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq %rsi, %rax
-; CHECK-EX-NEXT:    movq $5, (%rdi)
-; CHECK-EX-NEXT:    addq {{.*}}(%rip), %rax
-; CHECK-EX-NEXT:    retq
+; CHECK-O0-LABEL: fold_constant_clobber:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movq {{.*}}(%rip), %rax
+; CHECK-O0-NEXT:    movq $5, (%rdi)
+; CHECK-O0-NEXT:    addq %rsi, %rax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-CUR-LABEL: fold_constant_clobber:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    movq {{.*}}(%rip), %rax
+; CHECK-O3-CUR-NEXT:    movq $5, (%rdi)
+; CHECK-O3-CUR-NEXT:    addq %rsi, %rax
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: fold_constant_clobber:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    movq %rsi, %rax
+; CHECK-O3-EX-NEXT:    movq $5, (%rdi)
+; CHECK-O3-EX-NEXT:    addq {{.*}}(%rip), %rax
+; CHECK-O3-EX-NEXT:    retq
   %v = load atomic i64, i64* @Constant unordered, align 8
   store i64 5, i64* %p
   %ret = add i64 %v, %arg
@@ -2981,19 +2469,26 @@ define i64 @fold_constant_clobber(i64* %
 }
 
 define i64 @fold_constant_fence(i64 %arg) {
-; CHECK-NOX-LABEL: fold_constant_fence:
-; CHECK-NOX:       # %bb.0:
-; CHECK-NOX-NEXT:    movq {{.*}}(%rip), %rax
-; CHECK-NOX-NEXT:    mfence
-; CHECK-NOX-NEXT:    addq %rdi, %rax
-; CHECK-NOX-NEXT:    retq
-;
-; CHECK-EX-LABEL: fold_constant_fence:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq %rdi, %rax
-; CHECK-EX-NEXT:    mfence
-; CHECK-EX-NEXT:    addq {{.*}}(%rip), %rax
-; CHECK-EX-NEXT:    retq
+; CHECK-O0-LABEL: fold_constant_fence:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movq {{.*}}(%rip), %rax
+; CHECK-O0-NEXT:    mfence
+; CHECK-O0-NEXT:    addq %rdi, %rax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-CUR-LABEL: fold_constant_fence:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    movq {{.*}}(%rip), %rax
+; CHECK-O3-CUR-NEXT:    mfence
+; CHECK-O3-CUR-NEXT:    addq %rdi, %rax
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: fold_constant_fence:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    movq %rdi, %rax
+; CHECK-O3-EX-NEXT:    mfence
+; CHECK-O3-EX-NEXT:    addq {{.*}}(%rip), %rax
+; CHECK-O3-EX-NEXT:    retq
   %v = load atomic i64, i64* @Constant unordered, align 8
   fence seq_cst
   %ret = add i64 %v, %arg
@@ -3001,19 +2496,26 @@ define i64 @fold_constant_fence(i64 %arg
 }
 
 define i64 @fold_invariant_clobber(i64* dereferenceable(8) %p, i64 %arg) {
-; CHECK-NOX-LABEL: fold_invariant_clobber:
-; CHECK-NOX:       # %bb.0:
-; CHECK-NOX-NEXT:    movq (%rdi), %rax
-; CHECK-NOX-NEXT:    movq $5, (%rdi)
-; CHECK-NOX-NEXT:    addq %rsi, %rax
-; CHECK-NOX-NEXT:    retq
-;
-; CHECK-EX-LABEL: fold_invariant_clobber:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq %rsi, %rax
-; CHECK-EX-NEXT:    movq $5, (%rdi)
-; CHECK-EX-NEXT:    addq (%rdi), %rax
-; CHECK-EX-NEXT:    retq
+; CHECK-O0-LABEL: fold_invariant_clobber:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movq (%rdi), %rax
+; CHECK-O0-NEXT:    movq $5, (%rdi)
+; CHECK-O0-NEXT:    addq %rsi, %rax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-CUR-LABEL: fold_invariant_clobber:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    movq (%rdi), %rax
+; CHECK-O3-CUR-NEXT:    movq $5, (%rdi)
+; CHECK-O3-CUR-NEXT:    addq %rsi, %rax
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: fold_invariant_clobber:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    movq %rsi, %rax
+; CHECK-O3-EX-NEXT:    movq $5, (%rdi)
+; CHECK-O3-EX-NEXT:    addq (%rdi), %rax
+; CHECK-O3-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8, !invariant.load !{}
   store i64 5, i64* %p
   %ret = add i64 %v, %arg
@@ -3022,19 +2524,26 @@ define i64 @fold_invariant_clobber(i64*
 
 
 define i64 @fold_invariant_fence(i64* dereferenceable(8) %p, i64 %arg) {
-; CHECK-NOX-LABEL: fold_invariant_fence:
-; CHECK-NOX:       # %bb.0:
-; CHECK-NOX-NEXT:    movq (%rdi), %rax
-; CHECK-NOX-NEXT:    mfence
-; CHECK-NOX-NEXT:    addq %rsi, %rax
-; CHECK-NOX-NEXT:    retq
-;
-; CHECK-EX-LABEL: fold_invariant_fence:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq %rsi, %rax
-; CHECK-EX-NEXT:    mfence
-; CHECK-EX-NEXT:    addq (%rdi), %rax
-; CHECK-EX-NEXT:    retq
+; CHECK-O0-LABEL: fold_invariant_fence:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movq (%rdi), %rax
+; CHECK-O0-NEXT:    mfence
+; CHECK-O0-NEXT:    addq %rsi, %rax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-CUR-LABEL: fold_invariant_fence:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    movq (%rdi), %rax
+; CHECK-O3-CUR-NEXT:    mfence
+; CHECK-O3-CUR-NEXT:    addq %rsi, %rax
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: fold_invariant_fence:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    movq %rsi, %rax
+; CHECK-O3-EX-NEXT:    mfence
+; CHECK-O3-EX-NEXT:    addq (%rdi), %rax
+; CHECK-O3-EX-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8, !invariant.load !{}
   fence seq_cst
   %ret = add i64 %v, %arg
@@ -3045,26 +2554,33 @@ define i64 @fold_invariant_fence(i64* de
 ; Exercise a few cases involving any extend idioms
 
 define i16 @load_i8_anyext_i16(i8* %ptr) {
-; CHECK-O0-LABEL: load_i8_anyext_i16:
-; CHECK-O0:       # %bb.0:
-; CHECK-O0-NEXT:    movb (%rdi), %al
-; CHECK-O0-NEXT:    movzbl %al, %ecx
-; CHECK-O0-NEXT:    # kill: def $cx killed $cx killed $ecx
-; CHECK-O0-NEXT:    movw %cx, %ax
-; CHECK-O0-NEXT:    retq
-;
-; CHECK-O3-LABEL: load_i8_anyext_i16:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movzbl (%rdi), %eax
-; CHECK-O3-NEXT:    # kill: def $ax killed $ax killed $eax
-; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_i8_anyext_i16:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    vpbroadcastb (%rdi), %xmm0
-; CHECK-EX-NEXT:    vmovd %xmm0, %eax
-; CHECK-EX-NEXT:    # kill: def $ax killed $ax killed $eax
-; CHECK-EX-NEXT:    retq
+; CHECK-O0-CUR-LABEL: load_i8_anyext_i16:
+; CHECK-O0-CUR:       # %bb.0:
+; CHECK-O0-CUR-NEXT:    movb (%rdi), %al
+; CHECK-O0-CUR-NEXT:    movzbl %al, %ecx
+; CHECK-O0-CUR-NEXT:    # kill: def $cx killed $cx killed $ecx
+; CHECK-O0-CUR-NEXT:    movw %cx, %ax
+; CHECK-O0-CUR-NEXT:    retq
+;
+; CHECK-O3-CUR-LABEL: load_i8_anyext_i16:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    movzbl (%rdi), %eax
+; CHECK-O3-CUR-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O0-EX-LABEL: load_i8_anyext_i16:
+; CHECK-O0-EX:       # %bb.0:
+; CHECK-O0-EX-NEXT:    vpbroadcastb (%rdi), %xmm0
+; CHECK-O0-EX-NEXT:    vmovd %xmm0, %eax
+; CHECK-O0-EX-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-O0-EX-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: load_i8_anyext_i16:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    vpbroadcastb (%rdi), %xmm0
+; CHECK-O3-EX-NEXT:    vmovd %xmm0, %eax
+; CHECK-O3-EX-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-O3-EX-NEXT:    retq
   %v = load atomic i8, i8* %ptr unordered, align 2
   %vec = insertelement <2 x i8> undef, i8 %v, i32 0
   %res = bitcast <2 x i8> %vec to i16
@@ -3072,22 +2588,28 @@ define i16 @load_i8_anyext_i16(i8* %ptr)
 }
 
 define i32 @load_i8_anyext_i32(i8* %ptr) {
-; CHECK-O0-LABEL: load_i8_anyext_i32:
-; CHECK-O0:       # %bb.0:
-; CHECK-O0-NEXT:    movb (%rdi), %al
-; CHECK-O0-NEXT:    movzbl %al, %eax
-; CHECK-O0-NEXT:    retq
-;
-; CHECK-O3-LABEL: load_i8_anyext_i32:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movzbl (%rdi), %eax
-; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_i8_anyext_i32:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    vpbroadcastb (%rdi), %xmm0
-; CHECK-EX-NEXT:    vmovd %xmm0, %eax
-; CHECK-EX-NEXT:    retq
+; CHECK-O0-CUR-LABEL: load_i8_anyext_i32:
+; CHECK-O0-CUR:       # %bb.0:
+; CHECK-O0-CUR-NEXT:    movb (%rdi), %al
+; CHECK-O0-CUR-NEXT:    movzbl %al, %eax
+; CHECK-O0-CUR-NEXT:    retq
+;
+; CHECK-O3-CUR-LABEL: load_i8_anyext_i32:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    movzbl (%rdi), %eax
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O0-EX-LABEL: load_i8_anyext_i32:
+; CHECK-O0-EX:       # %bb.0:
+; CHECK-O0-EX-NEXT:    vpbroadcastb (%rdi), %xmm0
+; CHECK-O0-EX-NEXT:    vmovd %xmm0, %eax
+; CHECK-O0-EX-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: load_i8_anyext_i32:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    vpbroadcastb (%rdi), %xmm0
+; CHECK-O3-EX-NEXT:    vmovd %xmm0, %eax
+; CHECK-O3-EX-NEXT:    retq
   %v = load atomic i8, i8* %ptr unordered, align 4
   %vec = insertelement <4 x i8> undef, i8 %v, i32 0
   %res = bitcast <4 x i8> %vec to i32
@@ -3095,24 +2617,30 @@ define i32 @load_i8_anyext_i32(i8* %ptr)
 }
 
 define i32 @load_i16_anyext_i32(i16* %ptr) {
-; CHECK-O0-LABEL: load_i16_anyext_i32:
-; CHECK-O0:       # %bb.0:
-; CHECK-O0-NEXT:    movw (%rdi), %ax
-; CHECK-O0-NEXT:    # implicit-def: $ecx
-; CHECK-O0-NEXT:    movw %ax, %cx
-; CHECK-O0-NEXT:    movl %ecx, %eax
-; CHECK-O0-NEXT:    retq
-;
-; CHECK-O3-LABEL: load_i16_anyext_i32:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movzwl (%rdi), %eax
-; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_i16_anyext_i32:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    vpbroadcastw (%rdi), %xmm0
-; CHECK-EX-NEXT:    vmovd %xmm0, %eax
-; CHECK-EX-NEXT:    retq
+; CHECK-O0-CUR-LABEL: load_i16_anyext_i32:
+; CHECK-O0-CUR:       # %bb.0:
+; CHECK-O0-CUR-NEXT:    movw (%rdi), %ax
+; CHECK-O0-CUR-NEXT:    # implicit-def: $ecx
+; CHECK-O0-CUR-NEXT:    movw %ax, %cx
+; CHECK-O0-CUR-NEXT:    movl %ecx, %eax
+; CHECK-O0-CUR-NEXT:    retq
+;
+; CHECK-O3-CUR-LABEL: load_i16_anyext_i32:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    movzwl (%rdi), %eax
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O0-EX-LABEL: load_i16_anyext_i32:
+; CHECK-O0-EX:       # %bb.0:
+; CHECK-O0-EX-NEXT:    vpbroadcastw (%rdi), %xmm0
+; CHECK-O0-EX-NEXT:    vmovd %xmm0, %eax
+; CHECK-O0-EX-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: load_i16_anyext_i32:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    vpbroadcastw (%rdi), %xmm0
+; CHECK-O3-EX-NEXT:    vmovd %xmm0, %eax
+; CHECK-O3-EX-NEXT:    retq
   %v = load atomic i16, i16* %ptr unordered, align 4
   %vec = insertelement <2 x i16> undef, i16 %v, i64 0
   %res = bitcast <2 x i16> %vec to i32
@@ -3120,27 +2648,33 @@ define i32 @load_i16_anyext_i32(i16* %pt
 }
 
 define i64 @load_i16_anyext_i64(i16* %ptr) {
-; CHECK-O0-LABEL: load_i16_anyext_i64:
-; CHECK-O0:       # %bb.0:
-; CHECK-O0-NEXT:    movw (%rdi), %ax
-; CHECK-O0-NEXT:    # implicit-def: $ecx
-; CHECK-O0-NEXT:    movw %ax, %cx
-; CHECK-O0-NEXT:    vmovd %ecx, %xmm0
-; CHECK-O0-NEXT:    vmovq %xmm0, %rax
-; CHECK-O0-NEXT:    retq
-;
-; CHECK-O3-LABEL: load_i16_anyext_i64:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movzwl (%rdi), %eax
-; CHECK-O3-NEXT:    vmovd %eax, %xmm0
-; CHECK-O3-NEXT:    vmovq %xmm0, %rax
-; CHECK-O3-NEXT:    retq
-;
-; CHECK-EX-LABEL: load_i16_anyext_i64:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    vpbroadcastw (%rdi), %xmm0
-; CHECK-EX-NEXT:    vmovq %xmm0, %rax
-; CHECK-EX-NEXT:    retq
+; CHECK-O0-CUR-LABEL: load_i16_anyext_i64:
+; CHECK-O0-CUR:       # %bb.0:
+; CHECK-O0-CUR-NEXT:    movw (%rdi), %ax
+; CHECK-O0-CUR-NEXT:    # implicit-def: $ecx
+; CHECK-O0-CUR-NEXT:    movw %ax, %cx
+; CHECK-O0-CUR-NEXT:    vmovd %ecx, %xmm0
+; CHECK-O0-CUR-NEXT:    vmovq %xmm0, %rax
+; CHECK-O0-CUR-NEXT:    retq
+;
+; CHECK-O3-CUR-LABEL: load_i16_anyext_i64:
+; CHECK-O3-CUR:       # %bb.0:
+; CHECK-O3-CUR-NEXT:    movzwl (%rdi), %eax
+; CHECK-O3-CUR-NEXT:    vmovd %eax, %xmm0
+; CHECK-O3-CUR-NEXT:    vmovq %xmm0, %rax
+; CHECK-O3-CUR-NEXT:    retq
+;
+; CHECK-O0-EX-LABEL: load_i16_anyext_i64:
+; CHECK-O0-EX:       # %bb.0:
+; CHECK-O0-EX-NEXT:    vpbroadcastw (%rdi), %xmm0
+; CHECK-O0-EX-NEXT:    vmovq %xmm0, %rax
+; CHECK-O0-EX-NEXT:    retq
+;
+; CHECK-O3-EX-LABEL: load_i16_anyext_i64:
+; CHECK-O3-EX:       # %bb.0:
+; CHECK-O3-EX-NEXT:    vpbroadcastw (%rdi), %xmm0
+; CHECK-O3-EX-NEXT:    vmovq %xmm0, %rax
+; CHECK-O3-EX-NEXT:    retq
   %v = load atomic i16, i16* %ptr unordered, align 8
   %vec = insertelement <4 x i16> undef, i16 %v, i64 0
   %res = bitcast <4 x i16> %vec to i64




More information about the llvm-commits mailing list