[llvm] fae1ffc - [X86] Regenerate xop tests with common prefixes.

Tue Oct 27 10:01:42 PDT 2020

Author: Simon Pilgrim
Date: 2020-10-27T16:45:46Z
New Revision: fae1ffceaea191d0d432b5e8e7e44725f0d70f80

URL: https://github.com/llvm/llvm-project/commit/fae1ffceaea191d0d432b5e8e7e44725f0d70f80
DIFF: https://github.com/llvm/llvm-project/commit/fae1ffceaea191d0d432b5e8e7e44725f0d70f80.diff

LOG: [X86] Regenerate xop tests with common prefixes.

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/commute-xop.ll
    llvm/test/CodeGen/X86/xop-intrinsics-fast-isel.ll
    llvm/test/CodeGen/X86/xop-mask-comments.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/commute-xop.ll b/llvm/test/CodeGen/X86/commute-xop.ll
index 789afbb99bc6..80f721596580 100644

--- a/llvm/test/CodeGen/X86/commute-xop.ll
+++ b/llvm/test/CodeGen/X86/commute-xop.ll
@@ -1,13 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefixes=CHECK,X64
 
 define <16 x i8> @commute_fold_vpcomb(<16 x i8>* %a0, <16 x i8> %a1) {
-; X32-LABEL: commute_fold_vpcomb:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vpcomgtb (%eax), %xmm0, %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: commute_fold_vpcomb:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpcomgtb (%eax), %xmm0, %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: commute_fold_vpcomb:
 ; X64:       # %bb.0:
@@ -20,11 +20,11 @@ define <16 x i8> @commute_fold_vpcomb(<16 x i8>* %a0, <16 x i8> %a1) {
 declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone
 
 define <4 x i32> @commute_fold_vpcomd(<4 x i32>* %a0, <4 x i32> %a1) {
-; X32-LABEL: commute_fold_vpcomd:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vpcomged (%eax), %xmm0, %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: commute_fold_vpcomd:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpcomged (%eax), %xmm0, %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: commute_fold_vpcomd:
 ; X64:       # %bb.0:
@@ -37,11 +37,11 @@ define <4 x i32> @commute_fold_vpcomd(<4 x i32>* %a0, <4 x i32> %a1) {
 declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone
 
 define <2 x i64> @commute_fold_vpcomq(<2 x i64>* %a0, <2 x i64> %a1) {
-; X32-LABEL: commute_fold_vpcomq:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vpcomltq (%eax), %xmm0, %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: commute_fold_vpcomq:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpcomltq (%eax), %xmm0, %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: commute_fold_vpcomq:
 ; X64:       # %bb.0:
@@ -54,11 +54,11 @@ define <2 x i64> @commute_fold_vpcomq(<2 x i64>* %a0, <2 x i64> %a1) {
 declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone
 
 define <16 x i8> @commute_fold_vpcomub(<16 x i8>* %a0, <16 x i8> %a1) {
-; X32-LABEL: commute_fold_vpcomub:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vpcomleub (%eax), %xmm0, %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: commute_fold_vpcomub:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpcomleub (%eax), %xmm0, %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: commute_fold_vpcomub:
 ; X64:       # %bb.0:
@@ -71,11 +71,11 @@ define <16 x i8> @commute_fold_vpcomub(<16 x i8>* %a0, <16 x i8> %a1) {
 declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone
 
 define <4 x i32> @commute_fold_vpcomud(<4 x i32>* %a0, <4 x i32> %a1) {
-; X32-LABEL: commute_fold_vpcomud:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vpcomeqd (%eax), %xmm0, %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: commute_fold_vpcomud:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpcomeqd (%eax), %xmm0, %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: commute_fold_vpcomud:
 ; X64:       # %bb.0:
@@ -88,11 +88,11 @@ define <4 x i32> @commute_fold_vpcomud(<4 x i32>* %a0, <4 x i32> %a1) {
 declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone
 
 define <2 x i64> @commute_fold_vpcomuq(<2 x i64>* %a0, <2 x i64> %a1) {
-; X32-LABEL: commute_fold_vpcomuq:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vpcomneqq (%eax), %xmm0, %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: commute_fold_vpcomuq:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpcomneqq (%eax), %xmm0, %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: commute_fold_vpcomuq:
 ; X64:       # %bb.0:
@@ -105,15 +105,10 @@ define <2 x i64> @commute_fold_vpcomuq(<2 x i64>* %a0, <2 x i64> %a1) {
 declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone
 
 define <8 x i16> @commute_fold_vpcomuw(<8 x i16>* %a0, <8 x i16> %a1) {
-; X32-LABEL: commute_fold_vpcomuw:
-; X32:       # %bb.0:
-; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
-; X32-NEXT:    retl
-;
-; X64-LABEL: commute_fold_vpcomuw:
-; X64:       # %bb.0:
-; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
-; X64-NEXT:    retq
+; CHECK-LABEL: commute_fold_vpcomuw:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %1 = load <8 x i16>, <8 x i16>* %a0
   %2 = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %1, <8 x i16> %a1, i8 6) ; vpcomfalseuw
   ret <8 x i16> %2
@@ -121,15 +116,10 @@ define <8 x i16> @commute_fold_vpcomuw(<8 x i16>* %a0, <8 x i16> %a1) {
 declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone
 
 define <8 x i16> @commute_fold_vpcomw(<8 x i16>* %a0, <8 x i16> %a1) {
-; X32-LABEL: commute_fold_vpcomw:
-; X32:       # %bb.0:
-; X32-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; X32-NEXT:    retl
-;
-; X64-LABEL: commute_fold_vpcomw:
-; X64:       # %bb.0:
-; X64-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; X64-NEXT:    retq
+; CHECK-LABEL: commute_fold_vpcomw:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %1 = load <8 x i16>, <8 x i16>* %a0
   %2 = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %1, <8 x i16> %a1, i8 7) ; vpcomtruew
   ret <8 x i16> %2
@@ -137,11 +127,11 @@ define <8 x i16> @commute_fold_vpcomw(<8 x i16>* %a0, <8 x i16> %a1) {
 declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone
 
 define <4 x i32> @commute_fold_vpmacsdd(<4 x i32>* %a0, <4 x i32> %a1, <4 x i32> %a2) {
-; X32-LABEL: commute_fold_vpmacsdd:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vpmacsdd %xmm1, (%eax), %xmm0, %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: commute_fold_vpmacsdd:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpmacsdd %xmm1, (%eax), %xmm0, %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: commute_fold_vpmacsdd:
 ; X64:       # %bb.0:
@@ -154,11 +144,11 @@ define <4 x i32> @commute_fold_vpmacsdd(<4 x i32>* %a0, <4 x i32> %a1, <4 x i32>
 declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
 
 define <2 x i64> @commute_fold_vpmacsdqh(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
-; X32-LABEL: commute_fold_vpmacsdqh:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vpmacsdqh %xmm1, (%eax), %xmm0, %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: commute_fold_vpmacsdqh:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpmacsdqh %xmm1, (%eax), %xmm0, %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: commute_fold_vpmacsdqh:
 ; X64:       # %bb.0:
@@ -171,11 +161,11 @@ define <2 x i64> @commute_fold_vpmacsdqh(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64
 declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
 
 define <2 x i64> @commute_fold_vpmacsdql(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
-; X32-LABEL: commute_fold_vpmacsdql:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vpmacsdql %xmm1, (%eax), %xmm0, %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: commute_fold_vpmacsdql:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpmacsdql %xmm1, (%eax), %xmm0, %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: commute_fold_vpmacsdql:
 ; X64:       # %bb.0:
@@ -188,11 +178,11 @@ define <2 x i64> @commute_fold_vpmacsdql(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64
 declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
 
 define <4 x i32> @commute_fold_vpmacssdd(<4 x i32>* %a0, <4 x i32> %a1, <4 x i32> %a2) {
-; X32-LABEL: commute_fold_vpmacssdd:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vpmacssdd %xmm1, (%eax), %xmm0, %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: commute_fold_vpmacssdd:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpmacssdd %xmm1, (%eax), %xmm0, %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: commute_fold_vpmacssdd:
 ; X64:       # %bb.0:
@@ -205,11 +195,11 @@ define <4 x i32> @commute_fold_vpmacssdd(<4 x i32>* %a0, <4 x i32> %a1, <4 x i32
 declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
 
 define <2 x i64> @commute_fold_vpmacssdqh(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
-; X32-LABEL: commute_fold_vpmacssdqh:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vpmacssdqh %xmm1, (%eax), %xmm0, %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: commute_fold_vpmacssdqh:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpmacssdqh %xmm1, (%eax), %xmm0, %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: commute_fold_vpmacssdqh:
 ; X64:       # %bb.0:
@@ -222,11 +212,11 @@ define <2 x i64> @commute_fold_vpmacssdqh(<4 x i32>* %a0, <4 x i32> %a1, <2 x i6
 declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
 
 define <2 x i64> @commute_fold_vpmacssdql(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
-; X32-LABEL: commute_fold_vpmacssdql:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vpmacssdql %xmm1, (%eax), %xmm0, %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: commute_fold_vpmacssdql:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpmacssdql %xmm1, (%eax), %xmm0, %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: commute_fold_vpmacssdql:
 ; X64:       # %bb.0:
@@ -239,11 +229,11 @@ define <2 x i64> @commute_fold_vpmacssdql(<4 x i32>* %a0, <4 x i32> %a1, <2 x i6
 declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
 
 define <4 x i32> @commute_fold_vpmacsswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
-; X32-LABEL: commute_fold_vpmacsswd:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vpmacsswd %xmm1, (%eax), %xmm0, %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: commute_fold_vpmacsswd:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpmacsswd %xmm1, (%eax), %xmm0, %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: commute_fold_vpmacsswd:
 ; X64:       # %bb.0:
@@ -256,11 +246,11 @@ define <4 x i32> @commute_fold_vpmacsswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32
 declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
 
 define <8 x i16> @commute_fold_vpmacssww(<8 x i16>* %a0, <8 x i16> %a1, <8 x i16> %a2) {
-; X32-LABEL: commute_fold_vpmacssww:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vpmacssww %xmm1, (%eax), %xmm0, %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: commute_fold_vpmacssww:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpmacssww %xmm1, (%eax), %xmm0, %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: commute_fold_vpmacssww:
 ; X64:       # %bb.0:
@@ -273,11 +263,11 @@ define <8 x i16> @commute_fold_vpmacssww(<8 x i16>* %a0, <8 x i16> %a1, <8 x i16
 declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
 
 define <4 x i32> @commute_fold_vpmacswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
-; X32-LABEL: commute_fold_vpmacswd:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vpmacswd %xmm1, (%eax), %xmm0, %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: commute_fold_vpmacswd:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpmacswd %xmm1, (%eax), %xmm0, %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: commute_fold_vpmacswd:
 ; X64:       # %bb.0:
@@ -290,11 +280,11 @@ define <4 x i32> @commute_fold_vpmacswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32>
 declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
 
 define <8 x i16> @commute_fold_vpmacsww(<8 x i16>* %a0, <8 x i16> %a1, <8 x i16> %a2) {
-; X32-LABEL: commute_fold_vpmacsww:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vpmacsww %xmm1, (%eax), %xmm0, %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: commute_fold_vpmacsww:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpmacsww %xmm1, (%eax), %xmm0, %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: commute_fold_vpmacsww:
 ; X64:       # %bb.0:
@@ -307,11 +297,11 @@ define <8 x i16> @commute_fold_vpmacsww(<8 x i16>* %a0, <8 x i16> %a1, <8 x i16>
 declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
 
 define <4 x i32> @commute_fold_vpmadcsswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
-; X32-LABEL: commute_fold_vpmadcsswd:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vpmadcsswd %xmm1, (%eax), %xmm0, %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: commute_fold_vpmadcsswd:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpmadcsswd %xmm1, (%eax), %xmm0, %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: commute_fold_vpmadcsswd:
 ; X64:       # %bb.0:
@@ -324,11 +314,11 @@ define <4 x i32> @commute_fold_vpmadcsswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i3
 declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
 
 define <4 x i32> @commute_fold_vpmadcswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
-; X32-LABEL: commute_fold_vpmadcswd:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vpmadcswd %xmm1, (%eax), %xmm0, %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: commute_fold_vpmadcswd:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpmadcswd %xmm1, (%eax), %xmm0, %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: commute_fold_vpmadcswd:
 ; X64:       # %bb.0:

diff  --git a/llvm/test/CodeGen/X86/xop-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/xop-intrinsics-fast-isel.ll
index 037c8f123aad..b1771e8a3b85 100644
--- a/llvm/test/CodeGen/X86/xop-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/xop-intrinsics-fast-isel.ll
@@ -1,14 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -mattr=+avx,+fma4,+xop | FileCheck %s --check-prefix=ALL --check-prefix=X32
-; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+xop | FileCheck %s --check-prefix=ALL --check-prefix=X64
+; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -mattr=+avx,+fma4,+xop | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+xop | FileCheck %s --check-prefixes=CHECK,X64
 
 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/xop-builtins.c
 
 define <2 x i64> @test_mm_maccs_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
-; ALL-LABEL: test_mm_maccs_epi16:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpmacssww %xmm2, %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_maccs_epi16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmacssww %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   %arg2 = bitcast <2 x i64> %a2 to <8 x i16>
@@ -19,10 +19,10 @@ define <2 x i64> @test_mm_maccs_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a
 declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
 
 define <2 x i64> @test_mm_macc_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
-; ALL-LABEL: test_mm_macc_epi16:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpmacsww %xmm2, %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_macc_epi16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmacsww %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   %arg2 = bitcast <2 x i64> %a2 to <8 x i16>
@@ -33,10 +33,10 @@ define <2 x i64> @test_mm_macc_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2
 declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
 
 define <2 x i64> @test_mm_maccsd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
-; ALL-LABEL: test_mm_maccsd_epi16:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_maccsd_epi16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
@@ -47,10 +47,10 @@ define <2 x i64> @test_mm_maccsd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %
 declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
 
 define <2 x i64> @test_mm_maccd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
-; ALL-LABEL: test_mm_maccd_epi16:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpmacswd %xmm2, %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_maccd_epi16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmacswd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
@@ -61,10 +61,10 @@ define <2 x i64> @test_mm_maccd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a
 declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
 
 define <2 x i64> @test_mm_maccs_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
-; ALL-LABEL: test_mm_maccs_epi32:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_maccs_epi32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
   %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
@@ -75,10 +75,10 @@ define <2 x i64> @test_mm_maccs_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a
 declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
 
 define <2 x i64> @test_mm_macc_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
-; ALL-LABEL: test_mm_macc_epi32:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_macc_epi32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
   %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
@@ -89,10 +89,10 @@ define <2 x i64> @test_mm_macc_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2
 declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
 
 define <2 x i64> @test_mm_maccslo_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
-; ALL-LABEL: test_mm_maccslo_epi32:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_maccslo_epi32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
   %res = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %arg0, <4 x i32> %arg1, <2 x i64> %a2)
@@ -101,10 +101,10 @@ define <2 x i64> @test_mm_maccslo_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64>
 declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
 
 define <2 x i64> @test_mm_macclo_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
-; ALL-LABEL: test_mm_macclo_epi32:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_macclo_epi32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
   %res = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %arg0, <4 x i32> %arg1, <2 x i64> %a2)
@@ -113,10 +113,10 @@ define <2 x i64> @test_mm_macclo_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %
 declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
 
 define <2 x i64> @test_mm_maccshi_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
-; ALL-LABEL: test_mm_maccshi_epi32:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_maccshi_epi32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
   %res = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %arg0, <4 x i32> %arg1, <2 x i64> %a2)
@@ -125,10 +125,10 @@ define <2 x i64> @test_mm_maccshi_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64>
 declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
 
 define <2 x i64> @test_mm_macchi_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
-; ALL-LABEL: test_mm_macchi_epi32:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_macchi_epi32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
   %res = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %arg0, <4 x i32> %arg1, <2 x i64> %a2)
@@ -137,10 +137,10 @@ define <2 x i64> @test_mm_macchi_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %
 declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
 
 define <2 x i64> @test_mm_maddsd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
-; ALL-LABEL: test_mm_maddsd_epi16:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_maddsd_epi16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
@@ -151,10 +151,10 @@ define <2 x i64> @test_mm_maddsd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %
 declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
 
 define <2 x i64> @test_mm_maddd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
-; ALL-LABEL: test_mm_maddd_epi16:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_maddd_epi16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
@@ -165,10 +165,10 @@ define <2 x i64> @test_mm_maddd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a
 declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
 
 define <2 x i64> @test_mm_haddw_epi8(<2 x i64> %a0) {
-; ALL-LABEL: test_mm_haddw_epi8:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vphaddbw %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_haddw_epi8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vphaddbw %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   %res = call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %arg0)
   %bc = bitcast <8 x i16> %res to <2 x i64>
@@ -177,10 +177,10 @@ define <2 x i64> @test_mm_haddw_epi8(<2 x i64> %a0) {
 declare <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8>) nounwind readnone
 
 define <2 x i64> @test_mm_haddd_epi8(<2 x i64> %a0) {
-; ALL-LABEL: test_mm_haddd_epi8:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vphaddbd %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_haddd_epi8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vphaddbd %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   %res = call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %arg0)
   %bc = bitcast <4 x i32> %res to <2 x i64>
@@ -189,10 +189,10 @@ define <2 x i64> @test_mm_haddd_epi8(<2 x i64> %a0) {
 declare <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8>) nounwind readnone
 
 define <2 x i64> @test_mm_haddq_epi8(<2 x i64> %a0) {
-; ALL-LABEL: test_mm_haddq_epi8:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vphaddbq %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_haddq_epi8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vphaddbq %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   %res = call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %arg0)
   ret <2 x i64> %res
@@ -200,10 +200,10 @@ define <2 x i64> @test_mm_haddq_epi8(<2 x i64> %a0) {
 declare <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8>) nounwind readnone
 
 define <2 x i64> @test_mm_haddd_epi16(<2 x i64> %a0) {
-; ALL-LABEL: test_mm_haddd_epi16:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vphaddwd %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_haddd_epi16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vphaddwd %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   %res = call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %arg0)
   %bc = bitcast <4 x i32> %res to <2 x i64>
@@ -212,10 +212,10 @@ define <2 x i64> @test_mm_haddd_epi16(<2 x i64> %a0) {
 declare <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16>) nounwind readnone
 
 define <2 x i64> @test_mm_haddq_epi16(<2 x i64> %a0) {
-; ALL-LABEL: test_mm_haddq_epi16:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vphaddwq %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_haddq_epi16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vphaddwq %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   %res = call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %arg0)
   ret <2 x i64> %res
@@ -223,10 +223,10 @@ define <2 x i64> @test_mm_haddq_epi16(<2 x i64> %a0) {
 declare <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16>) nounwind readnone
 
 define <2 x i64> @test_mm_haddq_epi32(<2 x i64> %a0) {
-; ALL-LABEL: test_mm_haddq_epi32:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vphadddq %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_haddq_epi32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vphadddq %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   %res = call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %arg0)
   ret <2 x i64> %res
@@ -234,10 +234,10 @@ define <2 x i64> @test_mm_haddq_epi32(<2 x i64> %a0) {
 declare <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32>) nounwind readnone
 
 define <2 x i64> @test_mm_haddw_epu8(<2 x i64> %a0) {
-; ALL-LABEL: test_mm_haddw_epu8:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vphaddubw %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_haddw_epu8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vphaddubw %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   %res = call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %arg0)
   %bc = bitcast <8 x i16> %res to <2 x i64>
@@ -246,10 +246,10 @@ define <2 x i64> @test_mm_haddw_epu8(<2 x i64> %a0) {
 declare <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8>) nounwind readnone
 
 define <2 x i64> @test_mm_haddd_epu8(<2 x i64> %a0) {
-; ALL-LABEL: test_mm_haddd_epu8:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vphaddubd %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_haddd_epu8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vphaddubd %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   %res = call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %arg0)
   %bc = bitcast <4 x i32> %res to <2 x i64>
@@ -258,10 +258,10 @@ define <2 x i64> @test_mm_haddd_epu8(<2 x i64> %a0) {
 declare <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8>) nounwind readnone
 
 define <2 x i64> @test_mm_haddq_epu8(<2 x i64> %a0) {
-; ALL-LABEL: test_mm_haddq_epu8:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vphaddubq %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_haddq_epu8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vphaddubq %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   %res = call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %arg0)
   ret <2 x i64> %res
@@ -269,10 +269,10 @@ define <2 x i64> @test_mm_haddq_epu8(<2 x i64> %a0) {
 declare <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8>) nounwind readnone
 
 define <2 x i64> @test_mm_haddd_epu16(<2 x i64> %a0) {
-; ALL-LABEL: test_mm_haddd_epu16:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vphadduwd %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_haddd_epu16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vphadduwd %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   %res = call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %arg0)
   %bc = bitcast <4 x i32> %res to <2 x i64>
@@ -282,10 +282,10 @@ declare <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16>) nounwind readnone
 
 
 define <2 x i64> @test_mm_haddq_epu16(<2 x i64> %a0) {
-; ALL-LABEL: test_mm_haddq_epu16:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vphadduwq %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_haddq_epu16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vphadduwq %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   %res = call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %arg0)
   ret <2 x i64> %res
@@ -293,10 +293,10 @@ define <2 x i64> @test_mm_haddq_epu16(<2 x i64> %a0) {
 declare <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16>) nounwind readnone
 
 define <2 x i64> @test_mm_haddq_epu32(<2 x i64> %a0) {
-; ALL-LABEL: test_mm_haddq_epu32:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vphaddudq %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_haddq_epu32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vphaddudq %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   %res = call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %arg0)
   ret <2 x i64> %res
@@ -304,10 +304,10 @@ define <2 x i64> @test_mm_haddq_epu32(<2 x i64> %a0) {
 declare <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32>) nounwind readnone
 
 define <2 x i64> @test_mm_hsubw_epi8(<2 x i64> %a0) {
-; ALL-LABEL: test_mm_hsubw_epi8:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vphsubbw %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_hsubw_epi8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vphsubbw %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   %res = call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %arg0)
   %bc = bitcast <8 x i16> %res to <2 x i64>
@@ -316,10 +316,10 @@ define <2 x i64> @test_mm_hsubw_epi8(<2 x i64> %a0) {
 declare <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8>) nounwind readnone
 
 define <2 x i64> @test_mm_hsubd_epi16(<2 x i64> %a0) {
-; ALL-LABEL: test_mm_hsubd_epi16:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vphsubwd %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_hsubd_epi16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vphsubwd %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %arg0)
   %bc = bitcast <4 x i32> %res to <2 x i64>
@@ -328,10 +328,10 @@ define <2 x i64> @test_mm_hsubd_epi16(<2 x i64> %a0) {
 declare <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16>) nounwind readnone
 
 define <2 x i64> @test_mm_hsubq_epi32(<2 x i64> %a0) {
-; ALL-LABEL: test_mm_hsubq_epi32:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vphsubdq %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_hsubq_epi32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vphsubdq %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %arg0)
   ret <2 x i64> %res
@@ -339,39 +339,39 @@ define <2 x i64> @test_mm_hsubq_epi32(<2 x i64> %a0) {
 declare <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32>) nounwind readnone
 
 define <2 x i64> @test_mm_cmov_si128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
-; ALL-LABEL: test_mm_cmov_si128:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
-; ALL-NEXT:    vpxor %xmm3, %xmm2, %xmm3
-; ALL-NEXT:    vpand %xmm2, %xmm0, %xmm0
-; ALL-NEXT:    vpand %xmm3, %xmm1, %xmm1
-; ALL-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_cmov_si128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; CHECK-NEXT:    vpxor %xmm3, %xmm2, %xmm3
+; CHECK-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpand %xmm3, %xmm1, %xmm1
+; CHECK-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2)
   ret <2 x i64> %res
 }
 declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
 
 define <4 x i64> @test_mm256_cmov_si256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
-; ALL-LABEL: test_mm256_cmov_si256:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vxorps %xmm3, %xmm3, %xmm3
-; ALL-NEXT:    vcmptrueps %ymm3, %ymm3, %ymm3
-; ALL-NEXT:    vxorps %ymm3, %ymm2, %ymm3
-; ALL-NEXT:    vandps %ymm2, %ymm0, %ymm0
-; ALL-NEXT:    vandps %ymm3, %ymm1, %ymm1
-; ALL-NEXT:    vorps %ymm1, %ymm0, %ymm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm256_cmov_si256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; CHECK-NEXT:    vcmptrueps %ymm3, %ymm3, %ymm3
+; CHECK-NEXT:    vxorps %ymm3, %ymm2, %ymm3
+; CHECK-NEXT:    vandps %ymm2, %ymm0, %ymm0
+; CHECK-NEXT:    vandps %ymm3, %ymm1, %ymm1
+; CHECK-NEXT:    vorps %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2)
   ret <4 x i64> %res
 }
 declare <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone
 
 define <2 x i64> @test_mm_perm_epi8(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
-; ALL-LABEL: test_mm_perm_epi8:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpperm %xmm2, %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_perm_epi8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpperm %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
   %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
@@ -382,10 +382,10 @@ define <2 x i64> @test_mm_perm_epi8(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2)
 declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
 
 define <2 x i64> @test_mm_rot_epi8(<2 x i64> %a0, <2 x i64> %a1) {
-; ALL-LABEL: test_mm_rot_epi8:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vprotb %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_rot_epi8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vprotb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
   %res = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %arg0, <16 x i8> %arg0, <16 x i8> %arg1)
@@ -395,10 +395,10 @@ define <2 x i64> @test_mm_rot_epi8(<2 x i64> %a0, <2 x i64> %a1) {
 declare <16 x i8> @llvm.fshl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
 
 define <2 x i64> @test_mm_rot_epi16(<2 x i64> %a0, <2 x i64> %a1) {
-; ALL-LABEL: test_mm_rot_epi16:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vprotw %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_rot_epi16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vprotw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   %res = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %arg0, <8 x i16> %arg0, <8 x i16> %arg1)
@@ -408,10 +408,10 @@ define <2 x i64> @test_mm_rot_epi16(<2 x i64> %a0, <2 x i64> %a1) {
 declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
 
 define <2 x i64> @test_mm_rot_epi32(<2 x i64> %a0, <2 x i64> %a1) {
-; ALL-LABEL: test_mm_rot_epi32:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vprotd %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_rot_epi32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vprotd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
   %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %arg0, <4 x i32> %arg0, <4 x i32> %arg1)
@@ -421,20 +421,20 @@ define <2 x i64> @test_mm_rot_epi32(<2 x i64> %a0, <2 x i64> %a1) {
 declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
 
 define <2 x i64> @test_mm_rot_epi64(<2 x i64> %a0, <2 x i64> %a1) {
-; ALL-LABEL: test_mm_rot_epi64:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vprotq %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_rot_epi64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vprotq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a0, <2 x i64> %a0, <2 x i64> %a1)
   ret <2 x i64> %res
 }
 declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
 
 define <2 x i64> @test_mm_roti_epi8(<2 x i64> %a0) {
-; ALL-LABEL: test_mm_roti_epi8:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vprotb $1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_roti_epi8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vprotb $1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   %res = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %arg0, <16 x i8> %arg0, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
   %bc = bitcast <16 x i8> %res to <2 x i64>
@@ -442,10 +442,10 @@ define <2 x i64> @test_mm_roti_epi8(<2 x i64> %a0) {
 }
 
 define <2 x i64> @test_mm_roti_epi16(<2 x i64> %a0) {
-; ALL-LABEL: test_mm_roti_epi16:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vprotw $2, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_roti_epi16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vprotw $2, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   %res = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %arg0, <8 x i16> %arg0, <8 x i16> <i16 50, i16 50, i16 50, i16 50, i16 50, i16 50, i16 50, i16 50>)
   %bc = bitcast <8 x i16> %res to <2 x i64>
@@ -453,10 +453,10 @@ define <2 x i64> @test_mm_roti_epi16(<2 x i64> %a0) {
 }
 
 define <2 x i64> @test_mm_roti_epi32(<2 x i64> %a0) {
-; ALL-LABEL: test_mm_roti_epi32:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vprotd $2, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_roti_epi32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vprotd $2, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %arg0, <4 x i32> %arg0, <4 x i32> <i32 -30, i32 -30, i32 -30, i32 -30>)
   %bc = bitcast <4 x i32> %res to <2 x i64>
@@ -464,19 +464,19 @@ define <2 x i64> @test_mm_roti_epi32(<2 x i64> %a0) {
 }
 
 define <2 x i64> @test_mm_roti_epi64(<2 x i64> %a0) {
-; ALL-LABEL: test_mm_roti_epi64:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vprotq $36, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_roti_epi64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vprotq $36, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a0, <2 x i64> %a0, <2 x i64> <i64 100, i64 100>)
   ret <2 x i64> %res
 }
 
 define <2 x i64> @test_mm_shl_epi8(<2 x i64> %a0, <2 x i64> %a1) {
-; ALL-LABEL: test_mm_shl_epi8:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_shl_epi8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
   %res = call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %arg0, <16 x i8> %arg1)
@@ -486,10 +486,10 @@ define <2 x i64> @test_mm_shl_epi8(<2 x i64> %a0, <2 x i64> %a1) {
 declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone
 
 define <2 x i64> @test_mm_shl_epi16(<2 x i64> %a0, <2 x i64> %a1) {
-; ALL-LABEL: test_mm_shl_epi16:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpshlw %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_shl_epi16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpshlw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %arg0, <8 x i16> %arg1)
@@ -499,10 +499,10 @@ define <2 x i64> @test_mm_shl_epi16(<2 x i64> %a0, <2 x i64> %a1) {
 declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone
 
 define <2 x i64> @test_mm_shl_epi32(<2 x i64> %a0, <2 x i64> %a1) {
-; ALL-LABEL: test_mm_shl_epi32:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpshld %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_shl_epi32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpshld %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
   %res = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %arg0, <4 x i32> %arg1)
@@ -512,20 +512,20 @@ define <2 x i64> @test_mm_shl_epi32(<2 x i64> %a0, <2 x i64> %a1) {
 declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <2 x i64> @test_mm_shl_epi64(<2 x i64> %a0, <2 x i64> %a1) {
-; ALL-LABEL: test_mm_shl_epi64:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpshlq %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_shl_epi64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpshlq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %res = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %a0, <2 x i64> %a1)
   ret <2 x i64> %res
 }
 declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <2 x i64> @test_mm_sha_epi8(<2 x i64> %a0, <2 x i64> %a1) {
-; ALL-LABEL: test_mm_sha_epi8:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpshab %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_sha_epi8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpshab %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
   %res = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %arg0, <16 x i8> %arg1)
@@ -535,10 +535,10 @@ define <2 x i64> @test_mm_sha_epi8(<2 x i64> %a0, <2 x i64> %a1) {
 declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone
 
 define <2 x i64> @test_mm_sha_epi16(<2 x i64> %a0, <2 x i64> %a1) {
-; ALL-LABEL: test_mm_sha_epi16:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpshaw %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_sha_epi16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpshaw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   %res = call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %arg0, <8 x i16> %arg1)
@@ -548,10 +548,10 @@ define <2 x i64> @test_mm_sha_epi16(<2 x i64> %a0, <2 x i64> %a1) {
 declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone
 
 define <2 x i64> @test_mm_sha_epi32(<2 x i64> %a0, <2 x i64> %a1) {
-; ALL-LABEL: test_mm_sha_epi32:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpshad %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_sha_epi32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpshad %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
   %res = call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %arg0, <4 x i32> %arg1)
@@ -561,20 +561,20 @@ define <2 x i64> @test_mm_sha_epi32(<2 x i64> %a0, <2 x i64> %a1) {
 declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <2 x i64> @test_mm_sha_epi64(<2 x i64> %a0, <2 x i64> %a1) {
-; ALL-LABEL: test_mm_sha_epi64:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpshaq %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_sha_epi64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpshaq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %res = call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %a0, <2 x i64> %a1)
   ret <2 x i64> %res
 }
 declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <2 x i64> @test_mm_com_epu8(<2 x i64> %a0, <2 x i64> %a1) {
-; ALL-LABEL: test_mm_com_epu8:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpcomltub %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_com_epu8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpcomltub %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
   %cmp = icmp ult <16 x i8> %arg0, %arg1
@@ -584,10 +584,10 @@ define <2 x i64> @test_mm_com_epu8(<2 x i64> %a0, <2 x i64> %a1) {
 }
 
 define <2 x i64> @test_mm_com_epu16(<2 x i64> %a0, <2 x i64> %a1) {
-; ALL-LABEL: test_mm_com_epu16:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpcomltuw %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_com_epu16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpcomltuw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   %cmp = icmp ult <8 x i16> %arg0, %arg1
@@ -597,10 +597,10 @@ define <2 x i64> @test_mm_com_epu16(<2 x i64> %a0, <2 x i64> %a1) {
 }
 
 define <2 x i64> @test_mm_com_epu32(<2 x i64> %a0, <2 x i64> %a1) {
-; ALL-LABEL: test_mm_com_epu32:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpcomltud %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_com_epu32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpcomltud %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
   %cmp = icmp ult <4 x i32> %arg0, %arg1
@@ -610,20 +610,20 @@ define <2 x i64> @test_mm_com_epu32(<2 x i64> %a0, <2 x i64> %a1) {
 }
 
 define <2 x i64> @test_mm_com_epu64(<2 x i64> %a0, <2 x i64> %a1) {
-; ALL-LABEL: test_mm_com_epu64:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpcomltuq %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_com_epu64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpcomltuq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %cmp = icmp ult <2 x i64> %a0, %a1
   %res = sext <2 x i1> %cmp to <2 x i64>
   ret <2 x i64> %res
 }
 
 define <2 x i64> @test_mm_com_epi8(<2 x i64> %a0, <2 x i64> %a1) {
-; ALL-LABEL: test_mm_com_epi8:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpcomltb %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_com_epi8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpcomltb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
   %cmp = icmp slt <16 x i8> %arg0, %arg1
@@ -633,10 +633,10 @@ define <2 x i64> @test_mm_com_epi8(<2 x i64> %a0, <2 x i64> %a1) {
 }
 
 define <2 x i64> @test_mm_com_epi16(<2 x i64> %a0, <2 x i64> %a1) {
-; ALL-LABEL: test_mm_com_epi16:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpcomltw %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_com_epi16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpcomltw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
   %cmp = icmp slt <8 x i16> %arg0, %arg1
@@ -646,10 +646,10 @@ define <2 x i64> @test_mm_com_epi16(<2 x i64> %a0, <2 x i64> %a1) {
 }
 
 define <2 x i64> @test_mm_com_epi32(<2 x i64> %a0, <2 x i64> %a1) {
-; ALL-LABEL: test_mm_com_epi32:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpcomltd %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_com_epi32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpcomltd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
   %cmp = icmp slt <4 x i32> %arg0, %arg1
@@ -659,40 +659,40 @@ define <2 x i64> @test_mm_com_epi32(<2 x i64> %a0, <2 x i64> %a1) {
 }
 
 define <2 x i64> @test_mm_com_epi64(<2 x i64> %a0, <2 x i64> %a1) {
-; ALL-LABEL: test_mm_com_epi64:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpcomltq %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_com_epi64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpcomltq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %cmp = icmp slt <2 x i64> %a0, %a1
   %res = sext <2 x i1> %cmp to <2 x i64>
   ret <2 x i64> %res
 }
 
 define <2 x double> @test_mm_permute2_pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2) {
-; ALL-LABEL: test_mm_permute2_pd:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpermil2pd $0, %xmm2, %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_permute2_pd:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpermil2pd $0, %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2, i8 0)
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x i64>, i8) nounwind readnone
 
 define <4 x double> @test_mm256_permute2_pd(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2) {
-; ALL-LABEL: test_mm256_permute2_pd:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpermil2pd $0, %ymm2, %ymm1, %ymm0, %ymm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm256_permute2_pd:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpermil2pd $0, %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2, i8 0)
   ret <4 x double> %res
 }
 declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x i64>, i8) nounwind readnone
 
 define <4 x float> @test_mm_permute2_ps(<4 x float> %a0, <4 x float> %a1, <2 x i64> %a2) {
-; ALL-LABEL: test_mm_permute2_ps:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpermil2ps $0, %xmm2, %xmm1, %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_permute2_ps:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpermil2ps $0, %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
   %res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> %arg2, i8 0)
   ret <4 x float> %res
@@ -700,10 +700,10 @@ define <4 x float> @test_mm_permute2_ps(<4 x float> %a0, <4 x float> %a1, <2 x i
 declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x i32>, i8) nounwind readnone
 
 define <8 x float> @test_mm256_permute2_ps(<8 x float> %a0, <8 x float> %a1, <4 x i64> %a2) {
-; ALL-LABEL: test_mm256_permute2_ps:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vpermil2ps $0, %ymm2, %ymm1, %ymm0, %ymm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm256_permute2_ps:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpermil2ps $0, %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %arg2 = bitcast <4 x i64> %a2 to <8 x i32>
   %res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x i32> %arg2, i8 0)
   ret <8 x float> %res
@@ -711,60 +711,60 @@ define <8 x float> @test_mm256_permute2_ps(<8 x float> %a0, <8 x float> %a1, <4
 declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x i32>, i8) nounwind readnone
 
 define <4 x float> @test_mm_frcz_ss(<4 x float> %a0) {
-; ALL-LABEL: test_mm_frcz_ss:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vfrczss %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_frcz_ss:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vfrczss %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0)
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone
 
 define <2 x double> @test_mm_frcz_sd(<2 x double> %a0) {
-; ALL-LABEL: test_mm_frcz_sd:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vfrczsd %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_frcz_sd:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vfrczsd %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0)
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone
 
 define <4 x float> @test_mm_frcz_ps(<4 x float> %a0) {
-; ALL-LABEL: test_mm_frcz_ps:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vfrczps %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_frcz_ps:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vfrczps %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %a0)
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float>) nounwind readnone
 
 define <2 x double> @test_mm_frcz_pd(<2 x double> %a0) {
-; ALL-LABEL: test_mm_frcz_pd:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vfrczpd %xmm0, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm_frcz_pd:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vfrczpd %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %a0)
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double>) nounwind readnone
 
 define <8 x float> @test_mm256_frcz_ps(<8 x float> %a0) {
-; ALL-LABEL: test_mm256_frcz_ps:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vfrczps %ymm0, %ymm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm256_frcz_ps:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vfrczps %ymm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %a0)
   ret <8 x float> %res
 }
 declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone
 
 define <4 x double> @test_mm256_frcz_pd(<4 x double> %a0) {
-; ALL-LABEL: test_mm256_frcz_pd:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vfrczpd %ymm0, %ymm0
-; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: test_mm256_frcz_pd:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vfrczpd %ymm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
   %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %a0)
   ret <4 x double> %res
 }

diff  --git a/llvm/test/CodeGen/X86/xop-mask-comments.ll b/llvm/test/CodeGen/X86/xop-mask-comments.ll
index c8aa85c425a7..3e5bb351c5d1 100644
--- a/llvm/test/CodeGen/X86/xop-mask-comments.ll
+++ b/llvm/test/CodeGen/X86/xop-mask-comments.ll
@@ -1,87 +1,62 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefixes=CHECK,X64
 
 ;
 ; VPPERM
 ;
 
 define <16 x i8> @vpperm_shuffle_unary(<16 x i8> %a0) {
-; X32-LABEL: vpperm_shuffle_unary:
-; X32:       # %bb.0:
-; X32-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; X32-NEXT:    retl
-;
-; X64-LABEL: vpperm_shuffle_unary:
-; X64:       # %bb.0:
-; X64-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; X64-NEXT:    retq
+; CHECK-LABEL: vpperm_shuffle_unary:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
+; CHECK-NEXT:    ret{{[l|q]}}
   %1 = tail call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a0, <16 x i8> <i8 31, i8 14, i8 29, i8 12, i8 27, i8 10, i8 25, i8 8, i8 23, i8 6, i8 21, i8 4, i8 19, i8 2, i8 17, i8 0>)
   ret <16 x i8> %1
 }
 
 define <16 x i8> @vpperm_shuffle_unary_undef(<16 x i8> %a0) {
-; X32-LABEL: vpperm_shuffle_unary_undef:
-; X32:       # %bb.0:
-; X32-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; X32-NEXT:    retl
-;
-; X64-LABEL: vpperm_shuffle_unary_undef:
-; X64:       # %bb.0:
-; X64-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; X64-NEXT:    retq
+; CHECK-LABEL: vpperm_shuffle_unary_undef:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
+; CHECK-NEXT:    ret{{[l|q]}}
   %1 = tail call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> undef, <16 x i8> <i8 31, i8 14, i8 29, i8 12, i8 27, i8 10, i8 25, i8 8, i8 23, i8 6, i8 21, i8 4, i8 19, i8 2, i8 17, i8 0>)
   ret <16 x i8> %1
 }
 
 define <16 x i8> @vpperm_shuffle_unary_zero(<16 x i8> %a0) {
-; X32-LABEL: vpperm_shuffle_unary_zero:
-; X32:       # %bb.0:
-; X32-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3],zero,xmm0[1],zero
-; X32-NEXT:    retl
-;
-; X64-LABEL: vpperm_shuffle_unary_zero:
-; X64:       # %bb.0:
-; X64-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3],zero,xmm0[1],zero
-; X64-NEXT:    retq
+; CHECK-LABEL: vpperm_shuffle_unary_zero:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3],zero,xmm0[1],zero
+; CHECK-NEXT:    ret{{[l|q]}}
   %1 = tail call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a0, <16 x i8> <i8 31, i8 14, i8 29, i8 12, i8 27, i8 10, i8 25, i8 8, i8 23, i8 6, i8 21, i8 4, i8 19, i8 130, i8 17, i8 128>)
   ret <16 x i8> %1
 }
 
 define <16 x i8> @vpperm_shuffle_binary(<16 x i8> %a0, <16 x i8> %a1) {
-; X32-LABEL: vpperm_shuffle_binary:
-; X32:       # %bb.0:
-; X32-NEXT:    vpperm {{.*#+}} xmm0 = xmm1[15],xmm0[14],xmm1[13],xmm0[12],xmm1[11],xmm0[10],xmm1[9],xmm0[8],xmm1[7],xmm0[6],xmm1[5],xmm0[4],xmm1[3],xmm0[2],xmm1[1],xmm0[0]
-; X32-NEXT:    retl
-;
-; X64-LABEL: vpperm_shuffle_binary:
-; X64:       # %bb.0:
-; X64-NEXT:    vpperm {{.*#+}} xmm0 = xmm1[15],xmm0[14],xmm1[13],xmm0[12],xmm1[11],xmm0[10],xmm1[9],xmm0[8],xmm1[7],xmm0[6],xmm1[5],xmm0[4],xmm1[3],xmm0[2],xmm1[1],xmm0[0]
-; X64-NEXT:    retq
+; CHECK-LABEL: vpperm_shuffle_binary:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpperm {{.*#+}} xmm0 = xmm1[15],xmm0[14],xmm1[13],xmm0[12],xmm1[11],xmm0[10],xmm1[9],xmm0[8],xmm1[7],xmm0[6],xmm1[5],xmm0[4],xmm1[3],xmm0[2],xmm1[1],xmm0[0]
+; CHECK-NEXT:    ret{{[l|q]}}
   %1 = tail call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 31, i8 14, i8 29, i8 12, i8 27, i8 10, i8 25, i8 8, i8 23, i8 6, i8 21, i8 4, i8 19, i8 2, i8 17, i8 0>)
   ret <16 x i8> %1
 }
 
 define <16 x i8> @vpperm_shuffle_binary_zero(<16 x i8> %a0, <16 x i8> %a1) {
-; X32-LABEL: vpperm_shuffle_binary_zero:
-; X32:       # %bb.0:
-; X32-NEXT:    vpperm {{.*#+}} xmm0 = xmm1[15],xmm0[14],xmm1[13],xmm0[12],xmm1[11],xmm0[10],xmm1[9],xmm0[8],xmm1[7],xmm0[6],xmm1[5],xmm0[4],zero,zero,zero,zero
-; X32-NEXT:    retl
-;
-; X64-LABEL: vpperm_shuffle_binary_zero:
-; X64:       # %bb.0:
-; X64-NEXT:    vpperm {{.*#+}} xmm0 = xmm1[15],xmm0[14],xmm1[13],xmm0[12],xmm1[11],xmm0[10],xmm1[9],xmm0[8],xmm1[7],xmm0[6],xmm1[5],xmm0[4],zero,zero,zero,zero
-; X64-NEXT:    retq
+; CHECK-LABEL: vpperm_shuffle_binary_zero:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpperm {{.*#+}} xmm0 = xmm1[15],xmm0[14],xmm1[13],xmm0[12],xmm1[11],xmm0[10],xmm1[9],xmm0[8],xmm1[7],xmm0[6],xmm1[5],xmm0[4],zero,zero,zero,zero
+; CHECK-NEXT:    ret{{[l|q]}}
   %1 = tail call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 31, i8 14, i8 29, i8 12, i8 27, i8 10, i8 25, i8 8, i8 23, i8 6, i8 21, i8 4, i8 147, i8 130, i8 145, i8 128>)
   ret <16 x i8> %1
 }
 
 ; we can't decode vpperm's other permute ops
 define <16 x i8> @vpperm_shuffle_general(<16 x i8> %a0, <16 x i8> %a1) {
-; X32-LABEL: vpperm_shuffle_general:
-; X32:       # %bb.0:
-; X32-NEXT:    vpperm {{\.LCPI.*}}, %xmm0, %xmm0, %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: vpperm_shuffle_general:
+; X86:       # %bb.0:
+; X86-NEXT:    vpperm {{\.LCPI.*}}, %xmm0, %xmm0, %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: vpperm_shuffle_general:
 ; X64:       # %bb.0:
@@ -98,87 +73,56 @@ define <16 x i8> @vpperm_shuffle_general(<16 x i8> %a0, <16 x i8> %a1) {
 ; Note: _mm_permute2_pd shouldn't be used for constant shuffles as there will always
 ; be a quicker (and smaller) alternative.
 define <2 x double> @vpermil2pd_21(<2 x double> %a0, <2 x double> %a1) {
-; X32-LABEL: vpermil2pd_21:
-; X32:       # %bb.0:
-; X32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X32-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; X32-NEXT:    retl
-;
-; X64-LABEL: vpermil2pd_21:
-; X64:       # %bb.0:
-; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X64-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; X64-NEXT:    retq
+; CHECK-LABEL: vpermil2pd_21:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    ret{{[l|q]}}
   %1 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> <i64 10, i64 1>, i8 2)
   ret <2 x double> %1
 }
 
 define <4 x double> @vpermil2pd256_0062(<4 x double> %a0, <4 x double> %a1) {
-; X32-LABEL: vpermil2pd256_0062:
-; X32:       # %bb.0:
-; X32-NEXT:    vpermil2pd {{.*#+}} ymm0 = ymm0[0,0],ymm1[2],ymm0[2]
-; X32-NEXT:    retl
-;
-; X64-LABEL: vpermil2pd256_0062:
-; X64:       # %bb.0:
-; X64-NEXT:    vpermil2pd {{.*#+}} ymm0 = ymm0[0,0],ymm1[2],ymm0[2]
-; X64-NEXT:    retq
+; CHECK-LABEL: vpermil2pd256_0062:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpermil2pd {{.*#+}} ymm0 = ymm0[0,0],ymm1[2],ymm0[2]
+; CHECK-NEXT:    ret{{[l|q]}}
   %1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> <i64 0, i64 0, i64 4, i64 0>, i8 0)
   ret <4 x double> %1
 }
 
 define <4 x double> @vpermil2pd256_zz73(<4 x double> %a0, <4 x double> %a1) {
-; X32-LABEL: vpermil2pd256_zz73:
-; X32:       # %bb.0:
-; X32-NEXT:    vpermil2pd {{.*#+}} ymm0 = zero,zero,ymm1[3],ymm0[3]
-; X32-NEXT:    retl
-;
-; X64-LABEL: vpermil2pd256_zz73:
-; X64:       # %bb.0:
-; X64-NEXT:    vpermil2pd {{.*#+}} ymm0 = zero,zero,ymm1[3],ymm0[3]
-; X64-NEXT:    retq
+; CHECK-LABEL: vpermil2pd256_zz73:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpermil2pd {{.*#+}} ymm0 = zero,zero,ymm1[3],ymm0[3]
+; CHECK-NEXT:    ret{{[l|q]}}
   %1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> <i64 0, i64 0, i64 14, i64 10>, i8 3)
   ret <4 x double> %1
 }
 
 define <4 x float> @vpermil2ps_0561(<4 x float> %a0, <4 x float> %a1) {
-; X32-LABEL: vpermil2ps_0561:
-; X32:       # %bb.0:
-; X32-NEXT:    vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[1]
-; X32-NEXT:    retl
-;
-; X64-LABEL: vpermil2ps_0561:
-; X64:       # %bb.0:
-; X64-NEXT:    vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[1]
-; X64-NEXT:    retq
+; CHECK-LABEL: vpermil2ps_0561:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[1]
+; CHECK-NEXT:    ret{{[l|q]}}
   %1 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 5, i32 6, i32 1>, i8 0)
   ret <4 x float> %1
 }
 
 define <8 x float> @vpermil2ps256_098144FE(<8 x float> %a0, <8 x float> %a1) {
-; X32-LABEL: vpermil2ps256_098144FE:
-; X32:       # %bb.0:
-; X32-NEXT:    vpermil2ps {{.*#+}} ymm0 = ymm0[0],ymm1[1,0],ymm0[1,4,4],ymm1[7,6]
-; X32-NEXT:    retl
-;
-; X64-LABEL: vpermil2ps256_098144FE:
-; X64:       # %bb.0:
-; X64-NEXT:    vpermil2ps {{.*#+}} ymm0 = ymm0[0],ymm1[1,0],ymm0[1,4,4],ymm1[7,6]
-; X64-NEXT:    retq
+; CHECK-LABEL: vpermil2ps256_098144FE:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpermil2ps {{.*#+}} ymm0 = ymm0[0],ymm1[1,0],ymm0[1,4,4],ymm1[7,6]
+; CHECK-NEXT:    ret{{[l|q]}}
   %1 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 5, i32 4, i32 1, i32 0, i32 0, i32 7, i32 6>, i8 0)
   ret <8 x float> %1
 }
 
 define <8 x float> @vpermil2ps256_0zz8BzzA(<8 x float> %a0, <8 x float> %a1) {
-; X32-LABEL: vpermil2ps256_0zz8BzzA:
-; X32:       # %bb.0:
-; X32-NEXT:    vpermil2ps {{.*#+}} ymm0 = ymm0[0],zero,zero,ymm1[0,7],zero,zero,ymm1[6]
-; X32-NEXT:    retl
-;
-; X64-LABEL: vpermil2ps256_0zz8BzzA:
-; X64:       # %bb.0:
-; X64-NEXT:    vpermil2ps {{.*#+}} ymm0 = ymm0[0],zero,zero,ymm1[0,7],zero,zero,ymm1[6]
-; X64-NEXT:    retq
+; CHECK-LABEL: vpermil2ps256_0zz8BzzA:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpermil2ps {{.*#+}} ymm0 = ymm0[0],zero,zero,ymm1[0,7],zero,zero,ymm1[6]
+; CHECK-NEXT:    ret{{[l|q]}}
   %1 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 8, i32 8, i32 4, i32 7, i32 8, i32 8, i32 6>, i8 2)
   ret <8 x float> %1
 }