[llvm] r321601 - [X86] Add a DAG combine to fix (v4i1 (bitcast (i4))) before type legalization sees the i4 and changes to load/store.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Dec 31 00:25:50 PST 2017


Author: ctopper
Date: Sun Dec 31 00:25:50 2017
New Revision: 321601

URL: http://llvm.org/viewvc/llvm-project?rev=321601&view=rev
Log:
[X86] Add a DAG combine to fix (v4i1 (bitcast (i4))) before type legalization sees the i4 and changes to load/store.

Same for i2 and v2i1.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
    llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
    llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
    llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=321601&r1=321600&r2=321601&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Dec 31 00:25:50 2017
@@ -30412,9 +30412,22 @@ static SDValue combineBitcast(SDNode *N,
   // (i16 movmsk (16i8 sext (v16i1 x)))
   // before the setcc result is scalarized on subtargets that don't have legal
   // vxi1 types.
-  if (DCI.isBeforeLegalize())
+  if (DCI.isBeforeLegalize()) {
     if (SDValue V = combineBitcastvxi1(DAG, SDValue(N, 0), Subtarget))
       return V;
+
+    // If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer
+    // type, widen both sides to avoid a trip through memory.
+    if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.isScalarInteger() &&
+        Subtarget.hasVLX()) {
+      SDLoc dl(N);
+      N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i8, N0);
+      N0 = DAG.getBitcast(MVT::v8i1, N0);
+      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, N0,
+                         DAG.getIntPtrConstant(0, dl));
+    }
+  }
+
   // Since MMX types are special and don't usually play with other vector types,
   // it's better to handle them early to be sure we emit efficient code by
   // avoiding store-load conversions.

Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll?rev=321601&r1=321600&r2=321601&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll Sun Dec 31 00:25:50 2017
@@ -871,23 +871,14 @@ define <2 x i64> @test_mm_broadcastd_epi
 define <2 x i64> @test_mm_mask_broadcastd_epi32(<2 x i64> %a0, i8 %a1, <2 x i64> %a2) {
 ; X32-LABEL: test_mm_mask_broadcastd_epi32:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movb %al, (%esp)
-; X32-NEXT:    movzbl (%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vpbroadcastd %xmm1, %xmm0 {%k1}
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_mask_broadcastd_epi32:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $15, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vpbroadcastd %xmm1, %xmm0 {%k1}
 ; X64-NEXT:    retq
   %trn1 = trunc i8 %a1 to i4
@@ -903,23 +894,14 @@ define <2 x i64> @test_mm_mask_broadcast
 define <2 x i64> @test_mm_maskz_broadcastd_epi32(i8 %a0, <2 x i64> %a1) {
 ; X32-LABEL: test_mm_maskz_broadcastd_epi32:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movb %al, (%esp)
-; X32-NEXT:    movzbl (%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vpbroadcastd %xmm0, %xmm0 {%k1} {z}
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_maskz_broadcastd_epi32:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $15, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vpbroadcastd %xmm0, %xmm0 {%k1} {z}
 ; X64-NEXT:    retq
   %trn0 = trunc i8 %a0 to i4
@@ -1007,23 +989,14 @@ define <2 x i64> @test_mm_broadcastq_epi
 define <2 x i64> @test_mm_mask_broadcastq_epi64(<2 x i64> %a0, i8 %a1, <2 x i64> %a2) {
 ; X32-LABEL: test_mm_mask_broadcastq_epi64:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $3, %al
-; X32-NEXT:    movb %al, {{[0-9]+}}(%esp)
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vpbroadcastq %xmm1, %xmm0 {%k1}
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_mask_broadcastq_epi64:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $3, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vpbroadcastq %xmm1, %xmm0 {%k1}
 ; X64-NEXT:    retq
   %trn1 = trunc i8 %a1 to i2
@@ -1036,23 +1009,14 @@ define <2 x i64> @test_mm_mask_broadcast
 define <2 x i64> @test_mm_maskz_broadcastq_epi64(i8 %a0, <2 x i64> %a1) {
 ; X32-LABEL: test_mm_maskz_broadcastq_epi64:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $3, %al
-; X32-NEXT:    movb %al, {{[0-9]+}}(%esp)
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vpbroadcastq %xmm0, %xmm0 {%k1} {z}
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_maskz_broadcastq_epi64:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $3, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vpbroadcastq %xmm0, %xmm0 {%k1} {z}
 ; X64-NEXT:    retq
   %trn0 = trunc i8 %a0 to i2
@@ -1079,23 +1043,14 @@ define <4 x i64> @test_mm256_broadcastq_
 define <4 x i64> @test_mm256_mask_broadcastq_epi64(<4 x i64> %a0, i8 %a1, <2 x i64> %a2) {
 ; X32-LABEL: test_mm256_mask_broadcastq_epi64:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movb %al, (%esp)
-; X32-NEXT:    movzbl (%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vpbroadcastq %xmm1, %ymm0 {%k1}
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm256_mask_broadcastq_epi64:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $15, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vpbroadcastq %xmm1, %ymm0 {%k1}
 ; X64-NEXT:    retq
   %trn1 = trunc i8 %a1 to i4
@@ -1108,23 +1063,14 @@ define <4 x i64> @test_mm256_mask_broadc
 define <4 x i64> @test_mm256_maskz_broadcastq_epi64(i8 %a0, <2 x i64> %a1) {
 ; X32-LABEL: test_mm256_maskz_broadcastq_epi64:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movb %al, (%esp)
-; X32-NEXT:    movzbl (%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vpbroadcastq %xmm0, %ymm0 {%k1} {z}
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm256_maskz_broadcastq_epi64:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $15, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vpbroadcastq %xmm0, %ymm0 {%k1} {z}
 ; X64-NEXT:    retq
   %trn0 = trunc i8 %a0 to i4
@@ -1151,23 +1097,14 @@ define <2 x double> @test_mm_broadcastsd
 define <2 x double> @test_mm_mask_broadcastsd_pd(<2 x double> %a0, i8 %a1, <2 x double> %a2) {
 ; X32-LABEL: test_mm_mask_broadcastsd_pd:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $3, %al
-; X32-NEXT:    movb %al, {{[0-9]+}}(%esp)
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_mask_broadcastsd_pd:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $3, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
 ; X64-NEXT:    retq
   %trn1 = trunc i8 %a1 to i2
@@ -1180,23 +1117,14 @@ define <2 x double> @test_mm_mask_broadc
 define <2 x double> @test_mm_maskz_broadcastsd_pd(i8 %a0, <2 x double> %a1) {
 ; X32-LABEL: test_mm_maskz_broadcastsd_pd:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $3, %al
-; X32-NEXT:    movb %al, {{[0-9]+}}(%esp)
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_maskz_broadcastsd_pd:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $3, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
 ; X64-NEXT:    retq
   %trn0 = trunc i8 %a0 to i2
@@ -1223,23 +1151,14 @@ define <4 x double> @test_mm256_broadcas
 define <4 x double> @test_mm256_mask_broadcastsd_pd(<4 x double> %a0, i8 %a1, <2 x double> %a2) {
 ; X32-LABEL: test_mm256_mask_broadcastsd_pd:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movb %al, (%esp)
-; X32-NEXT:    movzbl (%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vbroadcastsd %xmm1, %ymm0 {%k1}
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm256_mask_broadcastsd_pd:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $15, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vbroadcastsd %xmm1, %ymm0 {%k1}
 ; X64-NEXT:    retq
   %trn1 = trunc i8 %a1 to i4
@@ -1252,23 +1171,14 @@ define <4 x double> @test_mm256_mask_bro
 define <4 x double> @test_mm256_maskz_broadcastsd_pd(i8 %a0, <2 x double> %a1) {
 ; X32-LABEL: test_mm256_maskz_broadcastsd_pd:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movb %al, (%esp)
-; X32-NEXT:    movzbl (%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vbroadcastsd %xmm0, %ymm0 {%k1} {z}
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm256_maskz_broadcastsd_pd:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $15, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vbroadcastsd %xmm0, %ymm0 {%k1} {z}
 ; X64-NEXT:    retq
   %trn0 = trunc i8 %a0 to i4
@@ -1295,23 +1205,14 @@ define <4 x float> @test_mm_broadcastss_
 define <4 x float> @test_mm_mask_broadcastss_ps(<4 x float> %a0, i8 %a1, <4 x float> %a2) {
 ; X32-LABEL: test_mm_mask_broadcastss_ps:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movb %al, (%esp)
-; X32-NEXT:    movzbl (%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vbroadcastss %xmm1, %xmm0 {%k1}
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_mask_broadcastss_ps:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $15, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vbroadcastss %xmm1, %xmm0 {%k1}
 ; X64-NEXT:    retq
   %trn1 = trunc i8 %a1 to i4
@@ -1324,23 +1225,14 @@ define <4 x float> @test_mm_mask_broadca
 define <4 x float> @test_mm_maskz_broadcastss_ps(i8 %a0, <4 x float> %a1) {
 ; X32-LABEL: test_mm_maskz_broadcastss_ps:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movb %al, (%esp)
-; X32-NEXT:    movzbl (%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vbroadcastss %xmm0, %xmm0 {%k1} {z}
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_maskz_broadcastss_ps:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $15, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vbroadcastss %xmm0, %xmm0 {%k1} {z}
 ; X64-NEXT:    retq
   %trn0 = trunc i8 %a0 to i4
@@ -1419,23 +1311,14 @@ define <2 x double> @test_mm_movddup_pd(
 define <2 x double> @test_mm_mask_movddup_pd(<2 x double> %a0, i8 %a1, <2 x double> %a2) {
 ; X32-LABEL: test_mm_mask_movddup_pd:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $3, %al
-; X32-NEXT:    movb %al, {{[0-9]+}}(%esp)
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_mask_movddup_pd:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $3, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
 ; X64-NEXT:    retq
   %trn1 = trunc i8 %a1 to i2
@@ -1448,23 +1331,14 @@ define <2 x double> @test_mm_mask_movddu
 define <2 x double> @test_mm_maskz_movddup_pd(i8 %a0, <2 x double> %a1) {
 ; X32-LABEL: test_mm_maskz_movddup_pd:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $3, %al
-; X32-NEXT:    movb %al, {{[0-9]+}}(%esp)
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_maskz_movddup_pd:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $3, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
 ; X64-NEXT:    retq
   %trn1 = trunc i8 %a0 to i2
@@ -1491,23 +1365,14 @@ define <4 x double> @test_mm256_movddup_
 define <4 x double> @test_mm256_mask_movddup_pd(<4 x double> %a0, i8 %a1, <4 x double> %a2) {
 ; X32-LABEL: test_mm256_mask_movddup_pd:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movb %al, (%esp)
-; X32-NEXT:    movzbl (%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vmovddup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2]
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm256_mask_movddup_pd:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $15, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vmovddup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2]
 ; X64-NEXT:    retq
   %trn1 = trunc i8 %a1 to i4
@@ -1520,23 +1385,14 @@ define <4 x double> @test_mm256_mask_mov
 define <4 x double> @test_mm256_maskz_movddup_pd(i8 %a0, <4 x double> %a1) {
 ; X32-LABEL: test_mm256_maskz_movddup_pd:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movb %al, (%esp)
-; X32-NEXT:    movzbl (%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm256_maskz_movddup_pd:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $15, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
 ; X64-NEXT:    retq
   %trn1 = trunc i8 %a0 to i4
@@ -1563,23 +1419,14 @@ define <4 x float> @test_mm_movehdup_ps(
 define <4 x float> @test_mm_mask_movehdup_ps(<4 x float> %a0, i8 %a1, <4 x float> %a2) {
 ; X32-LABEL: test_mm_mask_movehdup_ps:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movb %al, (%esp)
-; X32-NEXT:    movzbl (%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vmovshdup {{.*#+}} xmm0 {%k1} = xmm1[1,1,3,3]
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_mask_movehdup_ps:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $15, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vmovshdup {{.*#+}} xmm0 {%k1} = xmm1[1,1,3,3]
 ; X64-NEXT:    retq
   %trn1 = trunc i8 %a1 to i4
@@ -1592,23 +1439,14 @@ define <4 x float> @test_mm_mask_movehdu
 define <4 x float> @test_mm_maskz_movehdup_ps(i8 %a0, <4 x float> %a1) {
 ; X32-LABEL: test_mm_maskz_movehdup_ps:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movb %al, (%esp)
-; X32-NEXT:    movzbl (%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_maskz_movehdup_ps:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $15, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
 ; X64-NEXT:    retq
   %trn0 = trunc i8 %a0 to i4
@@ -1687,23 +1525,14 @@ define <4 x float> @test_mm_moveldup_ps(
 define <4 x float> @test_mm_mask_moveldup_ps(<4 x float> %a0, i8 %a1, <4 x float> %a2) {
 ; X32-LABEL: test_mm_mask_moveldup_ps:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movb %al, (%esp)
-; X32-NEXT:    movzbl (%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vmovsldup {{.*#+}} xmm0 {%k1} = xmm1[0,0,2,2]
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_mask_moveldup_ps:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $15, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vmovsldup {{.*#+}} xmm0 {%k1} = xmm1[0,0,2,2]
 ; X64-NEXT:    retq
   %trn1 = trunc i8 %a1 to i4
@@ -1716,23 +1545,14 @@ define <4 x float> @test_mm_mask_moveldu
 define <4 x float> @test_mm_maskz_moveldup_ps(i8 %a0, <4 x float> %a1) {
 ; X32-LABEL: test_mm_maskz_moveldup_ps:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movb %al, (%esp)
-; X32-NEXT:    movzbl (%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_maskz_moveldup_ps:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $15, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
 ; X64-NEXT:    retq
   %trn0 = trunc i8 %a0 to i4
@@ -1811,23 +1631,14 @@ define <4 x i64> @test_mm256_permutex_ep
 define <4 x i64> @test_mm256_mask_permutex_epi64(<4 x i64> %a0, i8 %a1, <4 x i64> %a2) {
 ; X32-LABEL: test_mm256_mask_permutex_epi64:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movb %al, (%esp)
-; X32-NEXT:    movzbl (%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vpermq {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm256_mask_permutex_epi64:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $15, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vpermq {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
 ; X64-NEXT:    retq
   %trn1 = trunc i8 %a1 to i4
@@ -1840,23 +1651,14 @@ define <4 x i64> @test_mm256_mask_permut
 define <4 x i64> @test_mm256_maskz_permutex_epi64(i8 %a0, <4 x i64> %a1) {
 ; X32-LABEL: test_mm256_maskz_permutex_epi64:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movb %al, (%esp)
-; X32-NEXT:    movzbl (%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm256_maskz_permutex_epi64:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $15, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
 ; X64-NEXT:    retq
   %trn1 = trunc i8 %a0 to i4
@@ -1883,23 +1685,14 @@ define <4 x double> @test_mm256_permutex
 define <4 x double> @test_mm256_mask_permutex_pd(<4 x double> %a0, i8 %a1, <4 x double> %a2) {
 ; X32-LABEL: test_mm256_mask_permutex_pd:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movb %al, (%esp)
-; X32-NEXT:    movzbl (%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm256_mask_permutex_pd:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $15, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
 ; X64-NEXT:    retq
   %trn1 = trunc i8 %a1 to i4
@@ -1912,23 +1705,14 @@ define <4 x double> @test_mm256_mask_per
 define <4 x double> @test_mm256_maskz_permutex_pd(i8 %a0, <4 x double> %a1) {
 ; X32-LABEL: test_mm256_maskz_permutex_pd:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movb %al, (%esp)
-; X32-NEXT:    movzbl (%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm256_maskz_permutex_pd:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $15, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
 ; X64-NEXT:    retq
   %trn1 = trunc i8 %a0 to i4
@@ -1955,23 +1739,14 @@ define <2 x double> @test_mm_shuffle_pd(
 define <2 x double> @test_mm_mask_shuffle_pd(<2 x double> %a0, i8 %a1, <2 x double> %a2, <2 x double> %a3) {
 ; X32-LABEL: test_mm_mask_shuffle_pd:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $3, %al
-; X32-NEXT:    movb %al, {{[0-9]+}}(%esp)
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} = xmm1[1],xmm2[1]
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_mask_shuffle_pd:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $3, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} = xmm1[1],xmm2[1]
 ; X64-NEXT:    retq
   %trn1 = trunc i8 %a1 to i2
@@ -1984,23 +1759,14 @@ define <2 x double> @test_mm_mask_shuffl
 define <2 x double> @test_mm_maskz_shuffle_pd(i8 %a0, <2 x double> %a1, <2 x double> %a2) {
 ; X32-LABEL: test_mm_maskz_shuffle_pd:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $3, %al
-; X32-NEXT:    movb %al, {{[0-9]+}}(%esp)
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_maskz_shuffle_pd:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $3, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
 ; X64-NEXT:    retq
   %trn1 = trunc i8 %a0 to i2
@@ -2027,23 +1793,14 @@ define <4 x double> @test_mm256_shuffle_
 define <4 x double> @test_mm256_mask_shuffle_pd(<4 x double> %a0, i8 %a1, <4 x double> %a2, <4 x double> %a3) {
 ; X32-LABEL: test_mm256_mask_shuffle_pd:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movb %al, (%esp)
-; X32-NEXT:    movzbl (%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vshufpd {{.*#+}} ymm0 {%k1} = ymm1[1],ymm2[1],ymm1[2],ymm2[2]
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm256_mask_shuffle_pd:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $15, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vshufpd {{.*#+}} ymm0 {%k1} = ymm1[1],ymm2[1],ymm1[2],ymm2[2]
 ; X64-NEXT:    retq
   %trn1 = trunc i8 %a1 to i4
@@ -2056,23 +1813,14 @@ define <4 x double> @test_mm256_mask_shu
 define <4 x double> @test_mm256_maskz_shuffle_pd(i8 %a0, <4 x double> %a1, <4 x double> %a2) {
 ; X32-LABEL: test_mm256_maskz_shuffle_pd:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movb %al, (%esp)
-; X32-NEXT:    movzbl (%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[2],ymm1[2]
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm256_maskz_shuffle_pd:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $15, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[2],ymm1[2]
 ; X64-NEXT:    retq
   %trn1 = trunc i8 %a0 to i4
@@ -2099,23 +1847,14 @@ define <4 x float> @test_mm_shuffle_ps(<
 define <4 x float> @test_mm_mask_shuffle_ps(<4 x float> %a0, i8 %a1, <4 x float> %a2, <4 x float> %a3) {
 ; X32-LABEL: test_mm_mask_shuffle_ps:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movb %al, (%esp)
-; X32-NEXT:    movzbl (%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vshufps {{.*#+}} xmm0 {%k1} = xmm1[0,1],xmm2[0,0]
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_mask_shuffle_ps:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $15, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vshufps {{.*#+}} xmm0 {%k1} = xmm1[0,1],xmm2[0,0]
 ; X64-NEXT:    retq
   %trn1 = trunc i8 %a1 to i4
@@ -2128,23 +1867,14 @@ define <4 x float> @test_mm_mask_shuffle
 define <4 x float> @test_mm_maskz_shuffle_ps(i8 %a0, <4 x float> %a1, <4 x float> %a2) {
 ; X32-LABEL: test_mm_maskz_shuffle_ps:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movb %al, (%esp)
-; X32-NEXT:    movzbl (%esp), %eax
 ; X32-NEXT:    kmovw %eax, %k1
 ; X32-NEXT:    vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1],xmm1[0,0]
-; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_maskz_shuffle_ps:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $15, %dil
-; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    kmovw %eax, %k1
+; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1],xmm1[0,0]
 ; X64-NEXT:    retq
   %trn0 = trunc i8 %a0 to i4

Modified: llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll?rev=321601&r1=321600&r2=321601&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll Sun Dec 31 00:25:50 2017
@@ -37177,8 +37177,7 @@ entry:
 define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqps %xmm1, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: def %al killed %al killed %eax
@@ -37236,8 +37235,7 @@ entry:
 define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqps (%rsi), %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: def %al killed %al killed %eax
@@ -37296,8 +37294,7 @@ entry:
 define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: def %al killed %al killed %eax
@@ -37517,8 +37514,7 @@ entry:
 define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqps %xmm1, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: def %ax killed %ax killed %eax
@@ -37576,8 +37572,7 @@ entry:
 define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqps (%rsi), %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: def %ax killed %ax killed %eax
@@ -37636,8 +37631,7 @@ entry:
 define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: def %ax killed %ax killed %eax
@@ -37827,8 +37821,7 @@ entry:
 define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqps %xmm1, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    retq
@@ -37875,8 +37868,7 @@ entry:
 define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqps (%rsi), %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    retq
@@ -37924,8 +37916,7 @@ entry:
 define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    retq
@@ -38122,8 +38113,7 @@ entry:
 define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqps %xmm1, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovq %k0, %rax
 ; VLX-NEXT:    retq
@@ -38176,8 +38166,7 @@ entry:
 define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqps (%rsi), %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovq %k0, %rax
 ; VLX-NEXT:    retq
@@ -38231,8 +38220,7 @@ entry:
 define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovq %k0, %rax
 ; VLX-NEXT:    retq
@@ -40054,8 +40042,7 @@ entry:
 define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovb %k0, -{{[0-9]+}}(%rsp)
 ; VLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
@@ -40089,8 +40076,7 @@ entry:
 define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovb %k0, -{{[0-9]+}}(%rsp)
 ; VLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
@@ -40125,8 +40111,7 @@ entry:
 define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovb %k0, -{{[0-9]+}}(%rsp)
 ; VLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
@@ -40280,8 +40265,7 @@ entry:
 define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: def %al killed %al killed %eax
@@ -40325,8 +40309,7 @@ entry:
 define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: def %al killed %al killed %eax
@@ -40371,8 +40354,7 @@ entry:
 define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: def %al killed %al killed %eax
@@ -40536,8 +40518,7 @@ entry:
 define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: def %ax killed %ax killed %eax
@@ -40581,8 +40562,7 @@ entry:
 define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: def %ax killed %ax killed %eax
@@ -40627,8 +40607,7 @@ entry:
 define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: def %ax killed %ax killed %eax
@@ -40804,8 +40783,7 @@ entry:
 define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    retq
@@ -40852,8 +40830,7 @@ entry:
 define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    retq
@@ -40901,8 +40878,7 @@ entry:
 define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    retq
@@ -41099,8 +41075,7 @@ entry:
 define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovq %k0, %rax
 ; VLX-NEXT:    retq
@@ -41153,8 +41128,7 @@ entry:
 define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovq %k0, %rax
 ; VLX-NEXT:    retq
@@ -41208,8 +41182,7 @@ entry:
 define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovq %k0, %rax
 ; VLX-NEXT:    retq
@@ -41433,8 +41406,7 @@ entry:
 define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: def %al killed %al killed %eax
@@ -41494,8 +41466,7 @@ entry:
 define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: def %al killed %al killed %eax
@@ -41556,8 +41527,7 @@ entry:
 define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: def %al killed %al killed %eax
@@ -41788,8 +41758,7 @@ entry:
 define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: def %ax killed %ax killed %eax
@@ -41849,8 +41818,7 @@ entry:
 define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: def %ax killed %ax killed %eax
@@ -41911,8 +41879,7 @@ entry:
 define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: def %ax killed %ax killed %eax
@@ -42110,8 +42077,7 @@ entry:
 define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    vzeroupper
@@ -42160,8 +42126,7 @@ entry:
 define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    vzeroupper
@@ -42211,8 +42176,7 @@ entry:
 define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    vzeroupper
@@ -42417,8 +42381,7 @@ entry:
 define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
 ; VLX-NEXT:    kmovq %k0, %rax
 ; VLX-NEXT:    vzeroupper
@@ -42473,8 +42436,7 @@ entry:
 define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
 ; VLX-NEXT:    kmovq %k0, %rax
 ; VLX-NEXT:    vzeroupper
@@ -42530,8 +42492,7 @@ entry:
 define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
 ; VLX:       # %bb.0: # %entry
-; VLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; VLX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
+; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
 ; VLX-NEXT:    kmovq %k0, %rax
 ; VLX-NEXT:    vzeroupper

Modified: llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll?rev=321601&r1=321600&r2=321601&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll Sun Dec 31 00:25:50 2017
@@ -44,7 +44,6 @@ define <2 x i64> @ext_i2_2i64(i2 %a0) {
 ;
 ; AVX512-LABEL: ext_i2_2i64:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    andb $3, %dil
 ; AVX512-NEXT:    kmovd %edi, %k1
 ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
@@ -84,7 +83,6 @@ define <4 x i32> @ext_i4_4i32(i4 %a0) {
 ;
 ; AVX512-LABEL: ext_i4_4i32:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    andb $15, %dil
 ; AVX512-NEXT:    kmovd %edi, %k1
 ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
@@ -235,7 +233,6 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) {
 ;
 ; AVX512-LABEL: ext_i4_4i64:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    andb $15, %dil
 ; AVX512-NEXT:    kmovd %edi, %k1
 ; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; AVX512-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}

Modified: llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll?rev=321601&r1=321600&r2=321601&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll Sun Dec 31 00:25:50 2017
@@ -57,7 +57,6 @@ define <2 x i64> @ext_i2_2i64(i2 %a0) {
 ;
 ; AVX512VLBW-LABEL: ext_i2_2i64:
 ; AVX512VLBW:       # %bb.0:
-; AVX512VLBW-NEXT:    andb $3, %dil
 ; AVX512VLBW-NEXT:    kmovd %edi, %k1
 ; AVX512VLBW-NEXT:    vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
 ; AVX512VLBW-NEXT:    retq
@@ -108,7 +107,6 @@ define <4 x i32> @ext_i4_4i32(i4 %a0) {
 ;
 ; AVX512VLBW-LABEL: ext_i4_4i32:
 ; AVX512VLBW:       # %bb.0:
-; AVX512VLBW-NEXT:    andb $15, %dil
 ; AVX512VLBW-NEXT:    kmovd %edi, %k1
 ; AVX512VLBW-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
 ; AVX512VLBW-NEXT:    retq
@@ -299,7 +297,6 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) {
 ;
 ; AVX512VLBW-LABEL: ext_i4_4i64:
 ; AVX512VLBW:       # %bb.0:
-; AVX512VLBW-NEXT:    andb $15, %dil
 ; AVX512VLBW-NEXT:    kmovd %edi, %k1
 ; AVX512VLBW-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm0 {%k1} {z}
 ; AVX512VLBW-NEXT:    retq




More information about the llvm-commits mailing list