[llvm-branch-commits] [llvm-branch] r325112 - Revert r319778 (and r319911) due to PR36357

Hans Wennborg via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Feb 14 02:51:00 PST 2018


Author: hans
Date: Wed Feb 14 02:51:00 2018
New Revision: 325112

URL: http://llvm.org/viewvc/llvm-project?rev=325112&view=rev
Log:
Revert r319778 (and r319911) due to PR36357

Modified:
    llvm/branches/release_60/include/llvm/IR/IntrinsicsX86.td
    llvm/branches/release_60/lib/IR/AutoUpgrade.cpp
    llvm/branches/release_60/lib/Target/X86/X86ISelLowering.cpp
    llvm/branches/release_60/lib/Target/X86/X86IntrinsicsInfo.h
    llvm/branches/release_60/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
    llvm/branches/release_60/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
    llvm/branches/release_60/test/CodeGen/X86/avx512-intrinsics.ll
    llvm/branches/release_60/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
    llvm/branches/release_60/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
    llvm/branches/release_60/test/CodeGen/X86/avx512bw-intrinsics.ll

Modified: llvm/branches/release_60/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/include/llvm/IR/IntrinsicsX86.td?rev=325112&r1=325111&r2=325112&view=diff
==============================================================================
--- llvm/branches/release_60/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/branches/release_60/include/llvm/IR/IntrinsicsX86.td Wed Feb 14 02:51:00 2018
@@ -3738,6 +3738,15 @@ let TargetPrefix = "x86" in {  // All in
   def int_x86_avx512_kxnor_w : // TODO: remove this intrinsic
               Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
                          [IntrNoMem]>;
+  def int_x86_avx512_kunpck_bw : GCCBuiltin<"__builtin_ia32_kunpckhi">,
+              Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
+                         [IntrNoMem]>;
+  def int_x86_avx512_kunpck_wd : GCCBuiltin<"__builtin_ia32_kunpcksi">,
+              Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+                         [IntrNoMem]>;
+  def int_x86_avx512_kunpck_dq : GCCBuiltin<"__builtin_ia32_kunpckdi">,
+              Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+                         [IntrNoMem]>;
   def int_x86_avx512_kortestz_w : GCCBuiltin<"__builtin_ia32_kortestzhi">,
               Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty],
                         [IntrNoMem]>;

Modified: llvm/branches/release_60/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/lib/IR/AutoUpgrade.cpp?rev=325112&r1=325111&r2=325112&view=diff
==============================================================================
--- llvm/branches/release_60/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/branches/release_60/lib/IR/AutoUpgrade.cpp Wed Feb 14 02:51:00 2018
@@ -75,7 +75,6 @@ static bool ShouldUpgradeX86Intrinsic(Fu
       Name=="ssse3.pabs.d.128" || // Added in 6.0
       Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
       Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
-      Name.startswith("avx512.kunpck") || //added in 6.0 
       Name.startswith("avx2.pabs.") || // Added in 6.0
       Name.startswith("avx512.mask.pabs.") || // Added in 6.0
       Name.startswith("avx512.broadcastm") || // Added in 6.0
@@ -1063,12 +1062,6 @@ void llvm::UpgradeIntrinsicCall(CallInst
       Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
                           CI->getArgOperand(1));
-    } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
-      uint64_t Shift = CI->getType()->getScalarSizeInBits() / 2;
-      uint64_t And = (1ULL << Shift) - 1; 
-      Value* LowBits =  Builder.CreateAnd(CI->getArgOperand(0), And);
-      Value* HighBits =  Builder.CreateShl(CI->getArgOperand(1), Shift);
-      Rep = Builder.CreateOr(LowBits, HighBits);
     } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
       Type *I32Ty = Type::getInt32Ty(C);
       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),

Modified: llvm/branches/release_60/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/lib/Target/X86/X86ISelLowering.cpp?rev=325112&r1=325111&r2=325112&view=diff
==============================================================================
--- llvm/branches/release_60/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/release_60/lib/Target/X86/X86ISelLowering.cpp Wed Feb 14 02:51:00 2018
@@ -30466,53 +30466,6 @@ static SDValue combineBitcastvxi1(Select
   SDValue N0 = BitCast.getOperand(0);
   EVT VecVT = N0->getValueType(0);
 
-  if (VT.isVector() && VecVT.isScalarInteger() && Subtarget.hasAVX512() &&
-      N0->getOpcode() == ISD::OR) {
-    SDValue Op0 = N0->getOperand(0);
-    SDValue Op1 = N0->getOperand(1);
-    MVT TrunckVT;
-    MVT BitcastVT;
-    switch (VT.getSimpleVT().SimpleTy) {
-    default:
-      return SDValue();
-    case MVT::v16i1:
-      TrunckVT = MVT::i8;
-      BitcastVT = MVT::v8i1;
-      break;
-    case MVT::v32i1:
-      TrunckVT = MVT::i16;
-      BitcastVT = MVT::v16i1;
-      break;
-    case MVT::v64i1:
-      TrunckVT = MVT::i32;
-      BitcastVT = MVT::v32i1;
-      break;
-    }
-    bool isArg0UndefRight = Op0->getOpcode() == ISD::SHL;
-    bool isArg0UndefLeft =
-        Op0->getOpcode() == ISD::ZERO_EXTEND || Op0->getOpcode() == ISD::AND;
-    bool isArg1UndefRight = Op1->getOpcode() == ISD::SHL;
-    bool isArg1UndefLeft =
-        Op1->getOpcode() == ISD::ZERO_EXTEND || Op1->getOpcode() == ISD::AND;
-    SDValue OpLeft;
-    SDValue OpRight;
-    if (isArg0UndefRight && isArg1UndefLeft) {
-      OpLeft = Op0;
-      OpRight = Op1;
-    } else if (isArg1UndefRight && isArg0UndefLeft) {
-      OpLeft = Op1;
-      OpRight = Op0;
-    } else
-      return SDValue();
-    SDLoc DL(BitCast);
-    SDValue Shr = OpLeft->getOperand(0);
-    SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, TrunckVT, Shr);
-    SDValue Bitcast1 = DAG.getBitcast(BitcastVT, Trunc1);
-    SDValue Trunc2 = DAG.getNode(ISD::TRUNCATE, DL, TrunckVT, OpRight);
-    SDValue Bitcast2 = DAG.getBitcast(BitcastVT, Trunc2);
-    return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Bitcast1, Bitcast2);
-  }
-
   if (!VT.isScalarInteger() || !VecVT.isSimple())
     return SDValue();
 

Modified: llvm/branches/release_60/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/lib/Target/X86/X86IntrinsicsInfo.h?rev=325112&r1=325111&r2=325112&view=diff
==============================================================================
--- llvm/branches/release_60/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/branches/release_60/lib/Target/X86/X86IntrinsicsInfo.h Wed Feb 14 02:51:00 2018
@@ -479,6 +479,9 @@ static const IntrinsicData  IntrinsicsWi
   X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
   X86_INTRINSIC_DATA(avx512_kand_w, MASK_BINOP, ISD::AND, 0),
   X86_INTRINSIC_DATA(avx512_kor_w, MASK_BINOP, ISD::OR, 0),
+  X86_INTRINSIC_DATA(avx512_kunpck_bw, KUNPCK, ISD::CONCAT_VECTORS, 0),
+  X86_INTRINSIC_DATA(avx512_kunpck_dq, KUNPCK, ISD::CONCAT_VECTORS, 0),
+  X86_INTRINSIC_DATA(avx512_kunpck_wd, KUNPCK, ISD::CONCAT_VECTORS, 0),
   X86_INTRINSIC_DATA(avx512_kxor_w, MASK_BINOP, ISD::XOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_add_pd_512, INTR_TYPE_2OP_MASK, ISD::FADD,
   X86ISD::FADD_RND),

Modified: llvm/branches/release_60/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll?rev=325112&r1=325111&r2=325112&view=diff
==============================================================================
--- llvm/branches/release_60/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll (original)
+++ llvm/branches/release_60/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll Wed Feb 14 02:51:00 2018
@@ -5,59 +5,6 @@
 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512f-builtins.c
 
 
-define zeroext i16 @test_mm512_kunpackb(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8 x i64> %__F) local_unnamed_addr #0 {
-; X32-LABEL: test_mm512_kunpackb:
-; X32:       # %bb.0: # %entry
-; X32-NEXT:    pushl %ebp
-; X32-NEXT:    .cfi_def_cfa_offset 8
-; X32-NEXT:    .cfi_offset %ebp, -8
-; X32-NEXT:    movl %esp, %ebp
-; X32-NEXT:    .cfi_def_cfa_register %ebp
-; X32-NEXT:    andl $-64, %esp
-; X32-NEXT:    subl $64, %esp
-; X32-NEXT:    vmovdqa64 136(%ebp), %zmm3
-; X32-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
-; X32-NEXT:    vpcmpneqd 8(%ebp), %zmm2, %k1
-; X32-NEXT:    kunpckbw %k0, %k1, %k1
-; X32-NEXT:    vpcmpneqd 72(%ebp), %zmm3, %k0 {%k1}
-; X32-NEXT:    kmovw %k0, %eax
-; X32-NEXT:    movzwl %ax, %eax
-; X32-NEXT:    movl %ebp, %esp
-; X32-NEXT:    popl %ebp
-; X32-NEXT:    vzeroupper
-; X32-NEXT:    retl
-;
-; X64-LABEL: test_mm512_kunpackb:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
-; X64-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1
-; X64-NEXT:    kunpckbw %k0, %k1, %k1
-; X64-NEXT:    vpcmpneqd %zmm5, %zmm4, %k0 {%k1}
-; X64-NEXT:    kmovw %k0, %eax
-; X64-NEXT:    movzwl %ax, %eax
-; X64-NEXT:    vzeroupper
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast <8 x i64> %__A to <16 x i32>
-  %1 = bitcast <8 x i64> %__B to <16 x i32>
-  %2 = icmp ne <16 x i32> %0, %1
-  %3 = bitcast <16 x i1> %2 to i16
-  %4 = bitcast <8 x i64> %__C to <16 x i32>
-  %5 = bitcast <8 x i64> %__D to <16 x i32>
-  %6 = icmp ne <16 x i32> %4, %5
-  %7 = bitcast <16 x i1> %6 to i16
-  %8 = and i16 %7, 255
-  %shl.i = shl i16 %3, 8
-  %or.i = or i16 %8, %shl.i
-  %9 = bitcast <8 x i64> %__E to <16 x i32>
-  %10 = bitcast <8 x i64> %__F to <16 x i32>
-  %11 = icmp ne <16 x i32> %9, %10
-  %12 = bitcast i16 %or.i to <16 x i1>
-  %13 = and <16 x i1> %11, %12
-  %14 = bitcast <16 x i1> %13 to i16
-  ret i16 %14
-}
-
 define <16 x float> @test_mm512_shuffle_f32x4(<16 x float> %__A, <16 x float> %__B) {
 ; X32-LABEL: test_mm512_shuffle_f32x4:
 ; X32:       # %bb.0: # %entry

Modified: llvm/branches/release_60/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/test/CodeGen/X86/avx512-intrinsics-upgrade.ll?rev=325112&r1=325111&r2=325112&view=diff
==============================================================================
--- llvm/branches/release_60/test/CodeGen/X86/avx512-intrinsics-upgrade.ll (original)
+++ llvm/branches/release_60/test/CodeGen/X86/avx512-intrinsics-upgrade.ll Wed Feb 14 02:51:00 2018
@@ -1,20 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
 
-declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
-
-define i16 @unpckbw_test(i16 %a0, i16 %a1) {
-; CHECK-LABEL: unpckbw_test:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    movzbl %dil, %eax
-; CHECK-NEXT:    shll $8, %esi
-; CHECK-NEXT:    orl %esi, %eax
-; CHECK-NEXT:    ## kill: def %ax killed %ax killed %eax
-; CHECK-NEXT:    retq
-  %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
-  ret i16 %res
-}
-
 define <16 x i32>@test_int_x86_avx512_mask_pbroadcastd_gpr_512(i32 %x0, <16 x i32> %x1, i16 %mask) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512:
 ; CHECK:       ## %bb.0:

Modified: llvm/branches/release_60/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/test/CodeGen/X86/avx512-intrinsics.ll?rev=325112&r1=325111&r2=325112&view=diff
==============================================================================
--- llvm/branches/release_60/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/branches/release_60/test/CodeGen/X86/avx512-intrinsics.ll Wed Feb 14 02:51:00 2018
@@ -96,6 +96,21 @@ define i16 @test_kor(i16 %a0, i16 %a1) {
   ret i16 %t2
 }
 
+declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
+
+define i16 @unpckbw_test(i16 %a0, i16 %a1) {
+; CHECK-LABEL: unpckbw_test:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    kmovw %edi, %k0
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    kunpckbw %k1, %k0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    ## kill: def %ax killed %ax killed %eax
+; CHECK-NEXT:    retq
+  %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
+  ret i16 %res
+}
+
 declare i16 @llvm.x86.avx512.kxnor.w(i16, i16) nounwind readnone
 ; TODO: the two kxnor instructions here a no op and should be elimintaed,
 ; probably by FoldConstantArithmetic in SelectionDAG.

Modified: llvm/branches/release_60/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll?rev=325112&r1=325111&r2=325112&view=diff
==============================================================================
--- llvm/branches/release_60/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll (original)
+++ llvm/branches/release_60/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll Wed Feb 14 02:51:00 2018
@@ -4,117 +4,6 @@
 
 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512bw-builtins.c
 
-define i64 @test_mm512_kunpackd(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8 x i64> %__F) {
-; X32-LABEL: test_mm512_kunpackd:
-; X32:       # %bb.0: # %entry
-; X32-NEXT:    pushl %ebp
-; X32-NEXT:    .cfi_def_cfa_offset 8
-; X32-NEXT:    .cfi_offset %ebp, -8
-; X32-NEXT:    movl %esp, %ebp
-; X32-NEXT:    .cfi_def_cfa_register %ebp
-; X32-NEXT:    andl $-64, %esp
-; X32-NEXT:    subl $64, %esp
-; X32-NEXT:    vmovdqa64 136(%ebp), %zmm3
-; X32-NEXT:    vmovdqa64 72(%ebp), %zmm4
-; X32-NEXT:    vmovdqa64 8(%ebp), %zmm5
-; X32-NEXT:    vpcmpneqb %zmm0, %zmm1, %k0
-; X32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
-; X32-NEXT:    vpcmpneqb %zmm5, %zmm2, %k0
-; X32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
-; X32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
-; X32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
-; X32-NEXT:    kunpckdq %k0, %k1, %k1
-; X32-NEXT:    vpcmpneqb %zmm3, %zmm4, %k0 {%k1}
-; X32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    movl %ebp, %esp
-; X32-NEXT:    popl %ebp
-; X32-NEXT:    vzeroupper
-; X32-NEXT:    retl
-;
-; X64-LABEL: test_mm512_kunpackd:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    vpcmpneqb %zmm0, %zmm1, %k0
-; X64-NEXT:    vpcmpneqb %zmm3, %zmm2, %k1
-; X64-NEXT:    kunpckdq %k0, %k1, %k1
-; X64-NEXT:    vpcmpneqb %zmm5, %zmm4, %k0 {%k1}
-; X64-NEXT:    kmovq %k0, %rax
-; X64-NEXT:    vzeroupper
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast <8 x i64> %__B to <64 x i8>
-  %1 = bitcast <8 x i64> %__A to <64 x i8>
-  %2 = icmp ne <64 x i8> %0, %1
-  %3 = bitcast <64 x i1> %2 to i64
-  %4 = bitcast <8 x i64> %__C to <64 x i8>
-  %5 = bitcast <8 x i64> %__D to <64 x i8>
-  %6 = icmp ne <64 x i8> %4, %5
-  %7 = bitcast <64 x i1> %6 to i64
-  %and.i = and i64 %7, 4294967295
-  %shl.i = shl i64 %3, 32
-  %or.i = or i64 %and.i, %shl.i
-  %8 = bitcast <8 x i64> %__E to <64 x i8>
-  %9 = bitcast <8 x i64> %__F to <64 x i8>
-  %10 = icmp ne <64 x i8> %8, %9
-  %11 = bitcast i64 %or.i to <64 x i1>
-  %12 = and <64 x i1> %10, %11
-  %13 = bitcast <64 x i1> %12 to i64
-  ret i64 %13
-}
-
-define i32 @test_mm512_kunpackw(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8 x i64> %__F) {
-; X32-LABEL: test_mm512_kunpackw:
-; X32:       # %bb.0: # %entry
-; X32-NEXT:    pushl %ebp
-; X32-NEXT:    .cfi_def_cfa_offset 8
-; X32-NEXT:    .cfi_offset %ebp, -8
-; X32-NEXT:    movl %esp, %ebp
-; X32-NEXT:    .cfi_def_cfa_register %ebp
-; X32-NEXT:    andl $-64, %esp
-; X32-NEXT:    subl $64, %esp
-; X32-NEXT:    vmovdqa64 136(%ebp), %zmm3
-; X32-NEXT:    vpcmpneqw %zmm0, %zmm1, %k0
-; X32-NEXT:    vpcmpneqw 8(%ebp), %zmm2, %k1
-; X32-NEXT:    kunpckwd %k0, %k1, %k1
-; X32-NEXT:    vpcmpneqw 72(%ebp), %zmm3, %k0 {%k1}
-; X32-NEXT:    kmovd %k0, %eax
-; X32-NEXT:    movl %ebp, %esp
-; X32-NEXT:    popl %ebp
-; X32-NEXT:    vzeroupper
-; X32-NEXT:    retl
-;
-; X64-LABEL: test_mm512_kunpackw:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    vpcmpneqw %zmm0, %zmm1, %k0
-; X64-NEXT:    vpcmpneqw %zmm3, %zmm2, %k1
-; X64-NEXT:    kunpckwd %k0, %k1, %k1
-; X64-NEXT:    vpcmpneqw %zmm5, %zmm4, %k0 {%k1}
-; X64-NEXT:    kmovd %k0, %eax
-; X64-NEXT:    vzeroupper
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast <8 x i64> %__B to <32 x i16>
-  %1 = bitcast <8 x i64> %__A to <32 x i16>
-  %2 = icmp ne <32 x i16> %0, %1
-  %3 = bitcast <32 x i1> %2 to i32
-  %4 = bitcast <8 x i64> %__C to <32 x i16>
-  %5 = bitcast <8 x i64> %__D to <32 x i16>
-  %6 = icmp ne <32 x i16> %4, %5
-  %7 = bitcast <32 x i1> %6 to i32
-  %and.i = and i32 %7, 65535
-  %shl.i = shl i32 %3, 16
-  %or.i = or i32 %and.i, %shl.i
-  %8 = bitcast <8 x i64> %__E to <32 x i16>
-  %9 = bitcast <8 x i64> %__F to <32 x i16>
-  %10 = icmp ne <32 x i16> %8, %9
-  %11 = bitcast i32 %or.i to <32 x i1>
-  %12 = and <32 x i1> %10, %11
-  %13 = bitcast <32 x i1> %12 to i32
-  ret i32 %13
-}
-
-
 define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext %__A)  {
 ; X32-LABEL: test_mm512_mask_set1_epi8:
 ; X32:       # %bb.0: # %entry
@@ -189,46 +78,19 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    movb %ch, %al
 ; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $55, %k1, %k1
-; X32-NEXT:    kxorq %k0, %k1, %k0
-; X32-NEXT:    kshiftrq $9, %k0, %k1
 ; X32-NEXT:    andb $2, %al
 ; X32-NEXT:    shrb %al
 ; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $54, %k1, %k1
-; X32-NEXT:    kxorq %k0, %k1, %k0
-; X32-NEXT:    kshiftrq $10, %k0, %k1
 ; X32-NEXT:    movb %ch, %al
 ; X32-NEXT:    andb $15, %al
 ; X32-NEXT:    movl %eax, %edx
 ; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $53, %k1, %k1
-; X32-NEXT:    kxorq %k0, %k1, %k0
-; X32-NEXT:    kshiftrq $11, %k0, %k1
+; X32-NEXT:    kmovd %edx, %k3
 ; X32-NEXT:    shrb $3, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrl $12, %eax
-; X32-NEXT:    andl $15, %eax
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    kmovd %eax, %k4
 ; X32-NEXT:    movl %ecx, %eax
 ; X32-NEXT:    shrl $13, %eax
 ; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k3
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrl $14, %eax
-; X32-NEXT:    andl $3, %eax
-; X32-NEXT:    kmovd %eax, %k4
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrl $15, %eax
-; X32-NEXT:    andl $1, %eax
 ; X32-NEXT:    kmovd %eax, %k5
 ; X32-NEXT:    movl %ecx, %edx
 ; X32-NEXT:    shrl $16, %edx
@@ -243,25 +105,52 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    kmovd %eax, %k7
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
+; X32-NEXT:    kshiftrq $55, %k1, %k1
+; X32-NEXT:    kxorq %k0, %k1, %k0
+; X32-NEXT:    kshiftrq $9, %k0, %k1
+; X32-NEXT:    kxorq %k2, %k1, %k1
+; X32-NEXT:    kshiftlq $63, %k1, %k1
+; X32-NEXT:    kshiftrq $54, %k1, %k1
+; X32-NEXT:    kxorq %k0, %k1, %k0
+; X32-NEXT:    kshiftrq $10, %k0, %k1
+; X32-NEXT:    kxorq %k3, %k1, %k1
+; X32-NEXT:    kshiftlq $63, %k1, %k1
+; X32-NEXT:    kshiftrq $53, %k1, %k1
+; X32-NEXT:    kxorq %k0, %k1, %k0
+; X32-NEXT:    kshiftrq $11, %k0, %k1
+; X32-NEXT:    kxorq %k4, %k1, %k1
+; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $52, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $12, %k0, %k1
+; X32-NEXT:    movl %ecx, %esi
+; X32-NEXT:    shrl $12, %esi
+; X32-NEXT:    andl $15, %esi
+; X32-NEXT:    kmovd %esi, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $51, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $13, %k0, %k1
-; X32-NEXT:    kxorq %k3, %k1, %k1
+; X32-NEXT:    kxorq %k5, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $50, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $14, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
+; X32-NEXT:    movl %ecx, %esi
+; X32-NEXT:    shrl $14, %esi
+; X32-NEXT:    andl $3, %esi
+; X32-NEXT:    kmovd %esi, %k2
+; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $49, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $15, %k0, %k1
-; X32-NEXT:    kxorq %k5, %k1, %k1
+; X32-NEXT:    movl %ecx, %esi
+; X32-NEXT:    shrl $15, %esi
+; X32-NEXT:    andl $1, %esi
+; X32-NEXT:    kmovd %esi, %k2
+; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $48, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
@@ -494,22 +383,14 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $43, %k0, %k1
 ; X32-NEXT:    kxorq %k4, %k1, %k1
-; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    shrl $12, %esi
-; X32-NEXT:    andl $15, %esi
-; X32-NEXT:    kmovd %esi, %k2
-; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    shrl $14, %esi
-; X32-NEXT:    andl $3, %esi
-; X32-NEXT:    kmovd %esi, %k3
-; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    shrl $15, %esi
-; X32-NEXT:    andl $1, %esi
-; X32-NEXT:    kmovd %esi, %k4
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $20, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $44, %k0, %k1
+; X32-NEXT:    movl %eax, %esi
+; X32-NEXT:    shrl $12, %esi
+; X32-NEXT:    andl $15, %esi
+; X32-NEXT:    kmovd %esi, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $19, %k1, %k1
@@ -520,12 +401,20 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    kshiftrq $18, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $46, %k0, %k1
-; X32-NEXT:    kxorq %k3, %k1, %k1
+; X32-NEXT:    movl %eax, %esi
+; X32-NEXT:    shrl $14, %esi
+; X32-NEXT:    andl $3, %esi
+; X32-NEXT:    kmovd %esi, %k2
+; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $17, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $47, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
+; X32-NEXT:    movl %eax, %esi
+; X32-NEXT:    shrl $15, %esi
+; X32-NEXT:    andl $1, %esi
+; X32-NEXT:    kmovd %esi, %k2
+; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $16, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
@@ -551,8 +440,8 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $12, %k1, %k1
-; X32-NEXT:    kxorq %k0, %k1, %k4
-; X32-NEXT:    kshiftrq $52, %k4, %k0
+; X32-NEXT:    kxorq %k0, %k1, %k3
+; X32-NEXT:    kshiftrq $52, %k3, %k0
 ; X32-NEXT:    movl %ecx, %edx
 ; X32-NEXT:    shrb $4, %dl
 ; X32-NEXT:    kmovd %edx, %k1
@@ -576,19 +465,19 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    andb $15, %cl
 ; X32-NEXT:    movl %ecx, %edx
 ; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k3
+; X32-NEXT:    kmovd %edx, %k4
 ; X32-NEXT:    kshiftlq $63, %k5, %k5
 ; X32-NEXT:    kshiftrq $11, %k5, %k5
-; X32-NEXT:    kxorq %k4, %k5, %k4
-; X32-NEXT:    kshiftrq $53, %k4, %k5
+; X32-NEXT:    kxorq %k3, %k5, %k3
+; X32-NEXT:    kshiftrq $53, %k3, %k5
 ; X32-NEXT:    kxorq %k6, %k5, %k5
 ; X32-NEXT:    kshiftlq $63, %k5, %k5
 ; X32-NEXT:    kshiftrq $10, %k5, %k5
-; X32-NEXT:    kxorq %k4, %k5, %k5
-; X32-NEXT:    kshiftrq $54, %k5, %k4
-; X32-NEXT:    kxorq %k7, %k4, %k6
+; X32-NEXT:    kxorq %k3, %k5, %k5
+; X32-NEXT:    kshiftrq $54, %k5, %k3
+; X32-NEXT:    kxorq %k7, %k3, %k6
 ; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k4
+; X32-NEXT:    kmovd %ecx, %k3
 ; X32-NEXT:    movl %eax, %ecx
 ; X32-NEXT:    shrl $29, %ecx
 ; X32-NEXT:    andb $1, %cl
@@ -603,12 +492,6 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    kxorq %k5, %k0, %k0
 ; X32-NEXT:    kshiftrq $56, %k0, %k5
 ; X32-NEXT:    kxorq %k1, %k5, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $28, %ecx
-; X32-NEXT:    kmovd %ecx, %k5
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $30, %ecx
-; X32-NEXT:    kmovd %ecx, %k6
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $7, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
@@ -618,17 +501,20 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    kshiftrq $6, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $58, %k0, %k1
-; X32-NEXT:    kxorq %k3, %k1, %k1
+; X32-NEXT:    kxorq %k4, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $5, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $59, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
+; X32-NEXT:    kxorq %k3, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $4, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $60, %k0, %k1
-; X32-NEXT:    kxorq %k5, %k1, %k1
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrl $28, %ecx
+; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $3, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
@@ -638,7 +524,10 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    kshiftrq $2, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $62, %k0, %k1
-; X32-NEXT:    kxorq %k6, %k1, %k1
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrl $30, %ecx
+; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    shrl $31, %eax
 ; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
@@ -743,46 +632,19 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    movb %ch, %al
 ; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $55, %k1, %k1
-; X32-NEXT:    kxorq %k0, %k1, %k0
-; X32-NEXT:    kshiftrq $9, %k0, %k1
 ; X32-NEXT:    andb $2, %al
 ; X32-NEXT:    shrb %al
 ; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $54, %k1, %k1
-; X32-NEXT:    kxorq %k0, %k1, %k0
-; X32-NEXT:    kshiftrq $10, %k0, %k1
 ; X32-NEXT:    movb %ch, %al
 ; X32-NEXT:    andb $15, %al
 ; X32-NEXT:    movl %eax, %edx
 ; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $53, %k1, %k1
-; X32-NEXT:    kxorq %k0, %k1, %k0
-; X32-NEXT:    kshiftrq $11, %k0, %k1
+; X32-NEXT:    kmovd %edx, %k3
 ; X32-NEXT:    shrb $3, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrl $12, %eax
-; X32-NEXT:    andl $15, %eax
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    kmovd %eax, %k4
 ; X32-NEXT:    movl %ecx, %eax
 ; X32-NEXT:    shrl $13, %eax
 ; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k3
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrl $14, %eax
-; X32-NEXT:    andl $3, %eax
-; X32-NEXT:    kmovd %eax, %k4
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrl $15, %eax
-; X32-NEXT:    andl $1, %eax
 ; X32-NEXT:    kmovd %eax, %k5
 ; X32-NEXT:    movl %ecx, %edx
 ; X32-NEXT:    shrl $16, %edx
@@ -797,25 +659,52 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    kmovd %eax, %k7
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
+; X32-NEXT:    kshiftrq $55, %k1, %k1
+; X32-NEXT:    kxorq %k0, %k1, %k0
+; X32-NEXT:    kshiftrq $9, %k0, %k1
+; X32-NEXT:    kxorq %k2, %k1, %k1
+; X32-NEXT:    kshiftlq $63, %k1, %k1
+; X32-NEXT:    kshiftrq $54, %k1, %k1
+; X32-NEXT:    kxorq %k0, %k1, %k0
+; X32-NEXT:    kshiftrq $10, %k0, %k1
+; X32-NEXT:    kxorq %k3, %k1, %k1
+; X32-NEXT:    kshiftlq $63, %k1, %k1
+; X32-NEXT:    kshiftrq $53, %k1, %k1
+; X32-NEXT:    kxorq %k0, %k1, %k0
+; X32-NEXT:    kshiftrq $11, %k0, %k1
+; X32-NEXT:    kxorq %k4, %k1, %k1
+; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $52, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $12, %k0, %k1
+; X32-NEXT:    movl %ecx, %esi
+; X32-NEXT:    shrl $12, %esi
+; X32-NEXT:    andl $15, %esi
+; X32-NEXT:    kmovd %esi, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $51, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $13, %k0, %k1
-; X32-NEXT:    kxorq %k3, %k1, %k1
+; X32-NEXT:    kxorq %k5, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $50, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $14, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
+; X32-NEXT:    movl %ecx, %esi
+; X32-NEXT:    shrl $14, %esi
+; X32-NEXT:    andl $3, %esi
+; X32-NEXT:    kmovd %esi, %k2
+; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $49, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $15, %k0, %k1
-; X32-NEXT:    kxorq %k5, %k1, %k1
+; X32-NEXT:    movl %ecx, %esi
+; X32-NEXT:    shrl $15, %esi
+; X32-NEXT:    andl $1, %esi
+; X32-NEXT:    kmovd %esi, %k2
+; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $48, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
@@ -1048,22 +937,14 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $43, %k0, %k1
 ; X32-NEXT:    kxorq %k4, %k1, %k1
-; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    shrl $12, %esi
-; X32-NEXT:    andl $15, %esi
-; X32-NEXT:    kmovd %esi, %k2
-; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    shrl $14, %esi
-; X32-NEXT:    andl $3, %esi
-; X32-NEXT:    kmovd %esi, %k3
-; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    shrl $15, %esi
-; X32-NEXT:    andl $1, %esi
-; X32-NEXT:    kmovd %esi, %k4
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $20, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $44, %k0, %k1
+; X32-NEXT:    movl %eax, %esi
+; X32-NEXT:    shrl $12, %esi
+; X32-NEXT:    andl $15, %esi
+; X32-NEXT:    kmovd %esi, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $19, %k1, %k1
@@ -1074,12 +955,20 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    kshiftrq $18, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $46, %k0, %k1
-; X32-NEXT:    kxorq %k3, %k1, %k1
+; X32-NEXT:    movl %eax, %esi
+; X32-NEXT:    shrl $14, %esi
+; X32-NEXT:    andl $3, %esi
+; X32-NEXT:    kmovd %esi, %k2
+; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $17, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $47, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
+; X32-NEXT:    movl %eax, %esi
+; X32-NEXT:    shrl $15, %esi
+; X32-NEXT:    andl $1, %esi
+; X32-NEXT:    kmovd %esi, %k2
+; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $16, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
@@ -1105,8 +994,8 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $12, %k1, %k1
-; X32-NEXT:    kxorq %k0, %k1, %k4
-; X32-NEXT:    kshiftrq $52, %k4, %k0
+; X32-NEXT:    kxorq %k0, %k1, %k3
+; X32-NEXT:    kshiftrq $52, %k3, %k0
 ; X32-NEXT:    movl %ecx, %edx
 ; X32-NEXT:    shrb $4, %dl
 ; X32-NEXT:    kmovd %edx, %k1
@@ -1130,19 +1019,19 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    andb $15, %cl
 ; X32-NEXT:    movl %ecx, %edx
 ; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k3
+; X32-NEXT:    kmovd %edx, %k4
 ; X32-NEXT:    kshiftlq $63, %k5, %k5
 ; X32-NEXT:    kshiftrq $11, %k5, %k5
-; X32-NEXT:    kxorq %k4, %k5, %k4
-; X32-NEXT:    kshiftrq $53, %k4, %k5
+; X32-NEXT:    kxorq %k3, %k5, %k3
+; X32-NEXT:    kshiftrq $53, %k3, %k5
 ; X32-NEXT:    kxorq %k6, %k5, %k5
 ; X32-NEXT:    kshiftlq $63, %k5, %k5
 ; X32-NEXT:    kshiftrq $10, %k5, %k5
-; X32-NEXT:    kxorq %k4, %k5, %k5
-; X32-NEXT:    kshiftrq $54, %k5, %k4
-; X32-NEXT:    kxorq %k7, %k4, %k6
+; X32-NEXT:    kxorq %k3, %k5, %k5
+; X32-NEXT:    kshiftrq $54, %k5, %k3
+; X32-NEXT:    kxorq %k7, %k3, %k6
 ; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k4
+; X32-NEXT:    kmovd %ecx, %k3
 ; X32-NEXT:    movl %eax, %ecx
 ; X32-NEXT:    shrl $29, %ecx
 ; X32-NEXT:    andb $1, %cl
@@ -1157,12 +1046,6 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    kxorq %k5, %k0, %k0
 ; X32-NEXT:    kshiftrq $56, %k0, %k5
 ; X32-NEXT:    kxorq %k1, %k5, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $28, %ecx
-; X32-NEXT:    kmovd %ecx, %k5
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $30, %ecx
-; X32-NEXT:    kmovd %ecx, %k6
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $7, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
@@ -1172,17 +1055,20 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    kshiftrq $6, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $58, %k0, %k1
-; X32-NEXT:    kxorq %k3, %k1, %k1
+; X32-NEXT:    kxorq %k4, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $5, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $59, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
+; X32-NEXT:    kxorq %k3, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $4, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $60, %k0, %k1
-; X32-NEXT:    kxorq %k5, %k1, %k1
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrl $28, %ecx
+; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $3, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
@@ -1192,7 +1078,10 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    kshiftrq $2, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $62, %k0, %k1
-; X32-NEXT:    kxorq %k6, %k1, %k1
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrl $30, %ecx
+; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    shrl $31, %eax
 ; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al

Modified: llvm/branches/release_60/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll?rev=325112&r1=325111&r2=325112&view=diff
==============================================================================
--- llvm/branches/release_60/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll (original)
+++ llvm/branches/release_60/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll Wed Feb 14 02:51:00 2018
@@ -2,46 +2,6 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
 ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32
 
-declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32)
-
-define i32 at test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) {
-; AVX512BW-LABEL: test_int_x86_avx512_kunpck_wd:
-; AVX512BW:       ## %bb.0:
-; AVX512BW-NEXT:    movzwl %di, %eax
-; AVX512BW-NEXT:    shll $16, %esi
-; AVX512BW-NEXT:    orl %esi, %eax
-; AVX512BW-NEXT:    retq
-;
-; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd:
-; AVX512F-32:       # %bb.0:
-; AVX512F-32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
-; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    shll $16, %eax
-; AVX512F-32-NEXT:    orl %ecx, %eax
-; AVX512F-32-NEXT:    retl
-  %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1)
-  ret i32 %res
-}
-
-declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64)
-
-define i64 at test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
-; AVX512BW-LABEL: test_int_x86_avx512_kunpck_qd:
-; AVX512BW:       ## %bb.0:
-; AVX512BW-NEXT:    movl %edi, %eax
-; AVX512BW-NEXT:    shlq $32, %rsi
-; AVX512BW-NEXT:    orq %rsi, %rax
-; AVX512BW-NEXT:    retq
-;
-; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd:
-; AVX512F-32:       # %bb.0:
-; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT:    retl
-  %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1)
-  ret i64 %res
-}
-
 declare <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8, <64 x i8>, i64)
 
   define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) {

Modified: llvm/branches/release_60/test/CodeGen/X86/avx512bw-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/test/CodeGen/X86/avx512bw-intrinsics.ll?rev=325112&r1=325111&r2=325112&view=diff
==============================================================================
--- llvm/branches/release_60/test/CodeGen/X86/avx512bw-intrinsics.ll (original)
+++ llvm/branches/release_60/test/CodeGen/X86/avx512bw-intrinsics.ll Wed Feb 14 02:51:00 2018
@@ -1455,6 +1455,55 @@ define  <8 x i64>@test_int_x86_avx512_ma
   ret  <8 x i64> %res2
 }
 
+declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32)
+
+define i32 at test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) {
+; AVX512BW-LABEL: test_int_x86_avx512_kunpck_wd:
+; AVX512BW:       ## %bb.0:
+; AVX512BW-NEXT:    kmovd %edi, %k0
+; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    kunpckwd %k1, %k0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd:
+; AVX512F-32:       # %bb.0:
+; AVX512F-32-NEXT:    kmovw {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckwd %k0, %k1, %k0
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    retl
+  %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1)
+  ret i32 %res
+}
+
+declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64)
+
+define i64 at test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
+; AVX512BW-LABEL: test_int_x86_avx512_kunpck_qd:
+; AVX512BW:       ## %bb.0:
+; AVX512BW-NEXT:    kmovq %rdi, %k0
+; AVX512BW-NEXT:    kmovq %rsi, %k1
+; AVX512BW-NEXT:    kunpckdq %k1, %k0, %k0
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd:
+; AVX512F-32:       # %bb.0:
+; AVX512F-32-NEXT:    subl $12, %esp
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k0
+; AVX512F-32-NEXT:    kmovq %k0, (%esp)
+; AVX512F-32-NEXT:    movl (%esp), %eax
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    addl $12, %esp
+; AVX512F-32-NEXT:    retl
+  %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1)
+  ret i64 %res
+}
+
 declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>)
 
 define i64 at test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) {




More information about the llvm-branch-commits mailing list