[llvm] r319778 - [x86][AVX512] Lowering kunpack intrinsics to LLVM IR

Bill Seurer via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 6 11:35:26 PST 2017


Thanks, looks like it is working now.

On 12/06/2017 11:56 AM, Craig Topper wrote:
> I've attempted a fix for at least the ubsan failure in r319911
> 
> ~Craig
> 
> On Wed, Dec 6, 2017 at 9:49 AM, Bill Seurer via llvm-commits 
> <llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>> wrote:
> 
>     This is still causing failures on all the powerpc64 buildbots:
> 
>     ******************** TEST 'LLVM ::
>     CodeGen/X86/avx512bw-intrinsics-upgrade.ll' FAILED ********************
>     Script:
>     --
>     /home/buildbots/ppc64be-clang-test/clang-ppc64be/stage1/bin/llc <
>     /home/buildbots/ppc64be-clang-test/clang-ppc64be/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
>     -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw |
>     /home/buildbots/ppc64be-clang-test/clang-ppc64be/stage1/bin/FileCheck /home/buildbots/ppc64be-clang-test/clang-ppc64be/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
>     --check-prefix=ALL --check-prefix=AVX512BW
>     /home/buildbots/ppc64be-clang-test/clang-ppc64be/stage1/bin/llc <
>     /home/buildbots/ppc64be-clang-test/clang-ppc64be/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
>     -mtriple=i386-unknown-linux-gnu -mattr=+avx512f,+avx512bw |
>     /home/buildbots/ppc64be-clang-test/clang-ppc64be/stage1/bin/FileCheck /home/buildbots/ppc64be-clang-test/clang-ppc64be/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
>     --check-prefix=ALL --check-prefix=AVX512F-32
>     --
>     Exit Code: 1
> 
>     Command Output (stderr):
>     --
>     /home/buildbots/ppc64be-clang-test/clang-ppc64be/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll:32:18:
>     error: AVX512BW-NEXT: is not on the line after the previous match
>     ; AVX512BW-NEXT: movq %rsi, %rax
>                       ^
>     <stdin>:20:2: note: 'next' match was here
>       movq %rsi, %rax
>       ^
>     <stdin>:18:16: note: previous match ended here
>       shlq $32, %rsi
>                     ^
>     <stdin>:19:1: note: non-matching line after previous match is here
>       orq %rdi, %rsi
>     ^
> 
>     --
> 
>     ********************
> 
> 
>     On 12/05/2017 09:42 AM, Jina Nahias via llvm-commits wrote:
> 
>         Author: jina.nahias
>         Date: Tue Dec  5 07:42:56 2017
>         New Revision: 319778
> 
>         URL: http://llvm.org/viewvc/llvm-project?rev=319778&view=rev
>         <http://llvm.org/viewvc/llvm-project?rev=319778&view=rev>
>         Log:
>         [x86][AVX512] Lowering kunpack intrinsics to LLVM IR
> 
>         This patch, together with a matching clang patch
>         (https://reviews.llvm.org/D39719
>         <https://reviews.llvm.org/D39719>), implements the lowering of
>         X86 kunpack intrinsics to IR.
> 
>         Differential Revision: https://reviews.llvm.org/D39720
>         <https://reviews.llvm.org/D39720>
> 
>         Change-Id: I4088d9428478f9457f6afddc90bd3d66b3daf0a1
> 
>         Modified:
>               llvm/trunk/include/llvm/IR/IntrinsicsX86.td
>               llvm/trunk/lib/IR/AutoUpgrade.cpp
>               llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>               llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
>               llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
>               llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
>               llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
>               llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
>               llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
>               llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
> 
>         Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
>         URL:
>         http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=319778&r1=319777&r2=319778&view=diff
>         <http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=319778&r1=319777&r2=319778&view=diff>
>         ==============================================================================
>         --- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
>         +++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Tue Dec  5
>         07:42:56 2017
>         @@ -3738,15 +3738,6 @@ let TargetPrefix = "x86" in {  // All in
>              def int_x86_avx512_kxnor_w :
>         GCCBuiltin<"__builtin_ia32_kxnorhi">,
>                          Intrinsic<[llvm_i16_ty], [llvm_i16_ty,
>         llvm_i16_ty],
>                                     [IntrNoMem]>;
>         -  def int_x86_avx512_kunpck_bw :
>         GCCBuiltin<"__builtin_ia32_kunpckhi">,
>         -              Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
>         -                         [IntrNoMem]>;
>         -  def int_x86_avx512_kunpck_wd :
>         GCCBuiltin<"__builtin_ia32_kunpcksi">,
>         -              Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
>         -                         [IntrNoMem]>;
>         -  def int_x86_avx512_kunpck_dq :
>         GCCBuiltin<"__builtin_ia32_kunpckdi">,
>         -              Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
>         -                         [IntrNoMem]>;
>              def int_x86_avx512_kortestz_w :
>         GCCBuiltin<"__builtin_ia32_kortestzhi">,
>                          Intrinsic<[llvm_i32_ty], [llvm_i16_ty,
>         llvm_i16_ty],
>                                    [IntrNoMem]>;
> 
>         Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
>         URL:
>         http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=319778&r1=319777&r2=319778&view=diff
>         <http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=319778&r1=319777&r2=319778&view=diff>
>         ==============================================================================
>         --- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
>         +++ llvm/trunk/lib/IR/AutoUpgrade.cpp Tue Dec  5 07:42:56 2017
>         @@ -78,6 +78,7 @@ static bool ShouldUpgradeX86Intrinsic(Fu
>                  Name=="ssse3.pabs.d.128" || // Added in 6.0
>                  Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
>                  Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
>         +      Name.startswith("avx512.kunpck") || //added in 6.0
>                  Name.startswith("avx2.pabs.") || // Added in 6.0
>                  Name.startswith("avx512.mask.pabs.") || // Added in 6.0
>                  Name.startswith("avx512.broadcastm") || // Added in 6.0
>         @@ -1065,6 +1066,12 @@ void llvm::UpgradeIntrinsicCall(CallInst
>                  Rep = Builder.CreateVectorSplat(NumElts,
>         CI->getArgOperand(0));
>                  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
>                                      CI->getArgOperand(1));
>         +    } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
>         +      uint64_t Shift = CI->getType()->getScalarSizeInBits() / 2;
>         +      uint64_t And = (1 << Shift) - 1;
>         +      Value* LowBits =  Builder.CreateAnd(CI->getArgOperand(0),
>         And);
>         +      Value* HighBits = 
>         Builder.CreateShl(CI->getArgOperand(1), Shift);
>         +      Rep = Builder.CreateOr(LowBits, HighBits);
>                } else if (IsX86 && (Name == "sse.add.ss" || Name ==
>         "sse2.add.sd <http://sse2.add.sd>")) {
>                  Type *I32Ty = Type::getInt32Ty(C);
>                  Value *Elt0 =
>         Builder.CreateExtractElement(CI->getArgOperand(0),
> 
>         Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>         URL:
>         http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=319778&r1=319777&r2=319778&view=diff
>         <http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=319778&r1=319777&r2=319778&view=diff>
>         ==============================================================================
>         --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
>         +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Dec  5
>         07:42:56 2017
>         @@ -30007,6 +30007,53 @@ static SDValue combineBitcastvxi1(Select
>              SDValue N0 = BitCast.getOperand(0);
>              EVT VecVT = N0->getValueType(0);
>            +  if (VT.isVector() && VecVT.isScalarInteger() &&
>         Subtarget.hasAVX512() &&
>         +      N0->getOpcode() == ISD::OR) {
>         +    SDValue Op0 = N0->getOperand(0);
>         +    SDValue Op1 = N0->getOperand(1);
>         +    MVT TrunckVT;
>         +    MVT BitcastVT;
>         +    switch (VT.getSimpleVT().SimpleTy) {
>         +    default:
>         +      return SDValue();
>         +    case MVT::v16i1:
>         +      TrunckVT = MVT::i8;
>         +      BitcastVT = MVT::v8i1;
>         +      break;
>         +    case MVT::v32i1:
>         +      TrunckVT = MVT::i16;
>         +      BitcastVT = MVT::v16i1;
>         +      break;
>         +    case MVT::v64i1:
>         +      TrunckVT = MVT::i32;
>         +      BitcastVT = MVT::v32i1;
>         +      break;
>         +    }
>         +    bool isArg0UndefRight = Op0->getOpcode() == ISD::SHL;
>         +    bool isArg0UndefLeft =
>         +        Op0->getOpcode() == ISD::ZERO_EXTEND ||
>         Op0->getOpcode() == ISD::AND;
>         +    bool isArg1UndefRight = Op1->getOpcode() == ISD::SHL;
>         +    bool isArg1UndefLeft =
>         +        Op1->getOpcode() == ISD::ZERO_EXTEND ||
>         Op1->getOpcode() == ISD::AND;
>         +    SDValue OpLeft;
>         +    SDValue OpRight;
>         +    if (isArg0UndefRight && isArg1UndefLeft) {
>         +      OpLeft = Op0;
>         +      OpRight = Op1;
>         +    } else if (isArg1UndefRight && isArg0UndefLeft) {
>         +      OpLeft = Op1;
>         +      OpRight = Op0;
>         +    } else
>         +      return SDValue();
>         +    SDLoc DL(BitCast);
>         +    SDValue Shr = OpLeft->getOperand(0);
>         +    SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, TrunckVT, Shr);
>         +    SDValue Bitcast1 = DAG.getBitcast(BitcastVT, Trunc1);
>         +    SDValue Trunc2 = DAG.getNode(ISD::TRUNCATE, DL, TrunckVT,
>         OpRight);
>         +    SDValue Bitcast2 = DAG.getBitcast(BitcastVT, Trunc2);
>         +    return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Bitcast1,
>         Bitcast2);
>         +  }
>         +
>              if (!VT.isScalarInteger() || !VecVT.isSimple())
>                return SDValue();
> 
>         Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
>         URL:
>         http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=319778&r1=319777&r2=319778&view=diff
>         <http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=319778&r1=319777&r2=319778&view=diff>
>         ==============================================================================
>         --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
>         +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Tue Dec  5
>         07:42:56 2017
>         @@ -479,9 +479,6 @@ static const IntrinsicData  IntrinsicsWi
>              X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM,
>         X86ISD::EXP2, 0),
>              X86_INTRINSIC_DATA(avx512_kand_w, MASK_BINOP, ISD::AND, 0),
>              X86_INTRINSIC_DATA(avx512_kor_w, MASK_BINOP, ISD::OR, 0),
>         -  X86_INTRINSIC_DATA(avx512_kunpck_bw, KUNPCK,
>         ISD::CONCAT_VECTORS, 0),
>         -  X86_INTRINSIC_DATA(avx512_kunpck_dq, KUNPCK,
>         ISD::CONCAT_VECTORS, 0),
>         -  X86_INTRINSIC_DATA(avx512_kunpck_wd, KUNPCK,
>         ISD::CONCAT_VECTORS, 0),
>              X86_INTRINSIC_DATA(avx512_kxor_w, MASK_BINOP, ISD::XOR, 0),
>              X86_INTRINSIC_DATA(avx512_mask_add_pd_512,
>         INTR_TYPE_2OP_MASK, ISD::FADD,
>              X86ISD::FADD_RND),
> 
>         Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
>         URL:
>         http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll?rev=319778&r1=319777&r2=319778&view=diff
>         <http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll?rev=319778&r1=319777&r2=319778&view=diff>
>         ==============================================================================
>         --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
>         (original)
>         +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
>         Tue Dec  5 07:42:56 2017
>         @@ -5,6 +5,59 @@
>            ; NOTE: This should use IR equivalent to what is generated by
>         clang/test/CodeGen/avx512f-builtins.c
>              +define zeroext i16 @test_mm512_kunpackb(<8 x i64> %__A, <8
>         x i64> %__B, <8 x i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8
>         x i64> %__F) local_unnamed_addr #0 {
>         +; X32-LABEL: test_mm512_kunpackb:
>         +; X32:       # %bb.0: # %entry
>         +; X32-NEXT:    pushl %ebp
>         +; X32-NEXT:    .cfi_def_cfa_offset 8
>         +; X32-NEXT:    .cfi_offset %ebp, -8
>         +; X32-NEXT:    movl %esp, %ebp
>         +; X32-NEXT:    .cfi_def_cfa_register %ebp
>         +; X32-NEXT:    andl $-64, %esp
>         +; X32-NEXT:    subl $64, %esp
>         +; X32-NEXT:    vmovdqa64 136(%ebp), %zmm3
>         +; X32-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
>         +; X32-NEXT:    vpcmpneqd 8(%ebp), %zmm2, %k1
>         +; X32-NEXT:    kunpckbw %k0, %k1, %k1
>         +; X32-NEXT:    vpcmpneqd 72(%ebp), %zmm3, %k0 {%k1}
>         +; X32-NEXT:    kmovw %k0, %eax
>         +; X32-NEXT:    movzwl %ax, %eax
>         +; X32-NEXT:    movl %ebp, %esp
>         +; X32-NEXT:    popl %ebp
>         +; X32-NEXT:    vzeroupper
>         +; X32-NEXT:    retl
>         +;
>         +; X64-LABEL: test_mm512_kunpackb:
>         +; X64:       # %bb.0: # %entry
>         +; X64-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
>         +; X64-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1
>         +; X64-NEXT:    kunpckbw %k0, %k1, %k1
>         +; X64-NEXT:    vpcmpneqd %zmm5, %zmm4, %k0 {%k1}
>         +; X64-NEXT:    kmovw %k0, %eax
>         +; X64-NEXT:    movzwl %ax, %eax
>         +; X64-NEXT:    vzeroupper
>         +; X64-NEXT:    retq
>         +entry:
>         +  %0 = bitcast <8 x i64> %__A to <16 x i32>
>         +  %1 = bitcast <8 x i64> %__B to <16 x i32>
>         +  %2 = icmp ne <16 x i32> %0, %1
>         +  %3 = bitcast <16 x i1> %2 to i16
>         +  %4 = bitcast <8 x i64> %__C to <16 x i32>
>         +  %5 = bitcast <8 x i64> %__D to <16 x i32>
>         +  %6 = icmp ne <16 x i32> %4, %5
>         +  %7 = bitcast <16 x i1> %6 to i16
>         +  %8 = and i16 %7, 255
>         +  %shl.i = shl i16 %3, 8
>         +  %or.i = or i16 %8, %shl.i
>         +  %9 = bitcast <8 x i64> %__E to <16 x i32>
>         +  %10 = bitcast <8 x i64> %__F to <16 x i32>
>         +  %11 = icmp ne <16 x i32> %9, %10
>         +  %12 = bitcast i16 %or.i to <16 x i1>
>         +  %13 = and <16 x i1> %11, %12
>         +  %14 = bitcast <16 x i1> %13 to i16
>         +  ret i16 %14
>         +}
>         +
>            define <16 x float> @test_mm512_shuffle_f32x4(<16 x float>
>         %__A, <16 x float> %__B) {
>            ; X32-LABEL: test_mm512_shuffle_f32x4:
>            ; X32:       # %bb.0: # %entry
> 
>         Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
>         URL:
>         http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll?rev=319778&r1=319777&r2=319778&view=diff
>         <http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll?rev=319778&r1=319777&r2=319778&view=diff>
>         ==============================================================================
>         --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
>         (original)
>         +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll Tue
>         Dec  5 07:42:56 2017
>         @@ -1,7 +1,21 @@
>            ; NOTE: Assertions have been autogenerated by
>         utils/update_llc_test_checks.py
>            ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl |
>         FileCheck %s
>            - define <16 x
>         i32>@test_int_x86_avx512_mask_pbroadcastd_gpr_512(i32 %x0, <16 x
>         i32> %x1, i16 %mask) {
>         +declare i16 @llvm.x86.avx512.kunpck.bw
>         <http://llvm.x86.avx512.kunpck.bw>(i16, i16) nounwind readnone
>         +
>         +define i16 @unpckbw_test(i16 %a0, i16 %a1) {
>         +; CHECK-LABEL: unpckbw_test:
>         +; CHECK:       ## %bb.0:
>         +; CHECK-NEXT:    movzbl %dil, %eax
>         +; CHECK-NEXT:    shll $8, %esi
>         +; CHECK-NEXT:    orl %esi, %eax
>         +; CHECK-NEXT:    ## kill: %ax<def> %ax<kill> %eax<kill>
>         +; CHECK-NEXT:    retq
>         +  %res = call i16 @llvm.x86.avx512.kunpck.bw
>         <http://llvm.x86.avx512.kunpck.bw>(i16 %a0, i16 %a1)
>         +  ret i16 %res
>         +}
>         +
>         +define <16 x
>         i32>@test_int_x86_avx512_mask_pbroadcastd_gpr_512(i32 %x0, <16 x
>         i32> %x1, i16 %mask) {
>            ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512:
>            ; CHECK:       ## %bb.0:
>            ; CHECK-NEXT:    vpbroadcastd %edi, %zmm1
> 
>         Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
>         URL:
>         http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=319778&r1=319777&r2=319778&view=diff
>         <http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=319778&r1=319777&r2=319778&view=diff>
>         ==============================================================================
>         --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
>         +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Tue Dec  5
>         07:42:56 2017
>         @@ -96,21 +96,6 @@ define i16 @test_kor(i16 %a0, i16 %a1) {
>              ret i16 %t2
>            }
>            -declare i16 @llvm.x86.avx512.kunpck.bw
>         <http://llvm.x86.avx512.kunpck.bw>(i16, i16) nounwind readnone
>         -
>         -define i16 @unpckbw_test(i16 %a0, i16 %a1) {
>         -; CHECK-LABEL: unpckbw_test:
>         -; CHECK:       ## %bb.0:
>         -; CHECK-NEXT:    kmovw %edi, %k0
>         -; CHECK-NEXT:    kmovw %esi, %k1
>         -; CHECK-NEXT:    kunpckbw %k1, %k0, %k0
>         -; CHECK-NEXT:    kmovw %k0, %eax
>         -; CHECK-NEXT:    ## kill: %ax<def> %ax<kill> %eax<kill>
>         -; CHECK-NEXT:    retq
>         -  %res = call i16 @llvm.x86.avx512.kunpck.bw
>         <http://llvm.x86.avx512.kunpck.bw>(i16 %a0, i16 %a1)
>         -  ret i16 %res
>         -}
>         -
>            declare i16 @llvm.x86.avx512.kxnor.w(i16, i16) nounwind readnone
>            ; TODO: the two kxnor instructions here a no op and should be
>         elimintaed,
>            ; probably by FoldConstantArithmetic in SelectionDAG.
> 
>         Modified:
>         llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
>         URL:
>         http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll?rev=319778&r1=319777&r2=319778&view=diff
>         <http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll?rev=319778&r1=319777&r2=319778&view=diff>
>         ==============================================================================
>         --- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
>         (original)
>         +++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
>         Tue Dec  5 07:42:56 2017
>         @@ -4,6 +4,117 @@
>              ; NOTE: This should use IR equivalent to what is generated
>         by clang/test/CodeGen/avx512bw-builtins.c
>            +define i64 @test_mm512_kunpackd(<8 x i64> %__A, <8 x i64>
>         %__B, <8 x i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8 x i64>
>         %__F) {
>         +; X32-LABEL: test_mm512_kunpackd:
>         +; X32:       # %bb.0: # %entry
>         +; X32-NEXT:    pushl %ebp
>         +; X32-NEXT:    .cfi_def_cfa_offset 8
>         +; X32-NEXT:    .cfi_offset %ebp, -8
>         +; X32-NEXT:    movl %esp, %ebp
>         +; X32-NEXT:    .cfi_def_cfa_register %ebp
>         +; X32-NEXT:    andl $-64, %esp
>         +; X32-NEXT:    subl $64, %esp
>         +; X32-NEXT:    vmovdqa64 136(%ebp), %zmm3
>         +; X32-NEXT:    vmovdqa64 72(%ebp), %zmm4
>         +; X32-NEXT:    vmovdqa64 8(%ebp), %zmm5
>         +; X32-NEXT:    vpcmpneqb %zmm0, %zmm1, %k0
>         +; X32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
>         +; X32-NEXT:    vpcmpneqb %zmm5, %zmm2, %k0
>         +; X32-NE
>         <https://maps.google.com/?q=%2B;+X32-NE&entry=gmail&source=g>XT:   
>         kmovq %k0, {{[0-9]+}}(%esp)
>         +; X32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
>         +; X32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
>         +; X32-NEXT:    kunpckdq %k0, %k1, %k1
>         +; X32-NEXT:    vpcmpneqb %zmm3, %zmm4, %k0
>         <https://maps.google.com/?q=%25zmm4,+%25k0&entry=gmail&source=g>
>         {%k1}
>         +; X32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
>         +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
>         +; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
>         +; X32-NEXT:    movl %ebp, %esp
>         +; X32-NEXT:    popl %ebp
>         +; X32-NEXT:    vzeroupper
>         +; X32-NEXT:    retl
>         +;
>         +; X64-LABEL: test_mm512_kunpackd:
>         +; X64:       # %bb.0: # %entry
>         +; X64-NEXT:    vpcmpneqb %zmm0, %zmm1, %k0
>         +; X64-NEXT:    vpcmpneqb %zmm3, %zmm2, %k1
>         +; X64-NEXT:    kunpckdq %k0, %k1, %k1
>         +; X64-NEXT:    vpcmpneqb %zmm5, %zmm4, %k0 {%k1}
>         +; X64-NEXT:    kmovq %k0, %rax
>         +; X64-NEXT:    vzeroupper
>         +; X64-NEXT:    retq
>         +entry:
>         +  %0 = bitcast <8 x i64> %__B to <64 x i8>
>         +  %1 = bitcast <8 x i64> %__A to <64 x i8>
>         +  %2 = icmp ne <64 x i8> %0, %1
>         +  %3 = bitcast <64 x i1> %2 to i64
>         +  %4 = bitcast <8 x i64> %__C to <64 x i8>
>         +  %5 = bitcast <8 x i64> %__D to <64 x i8>
>         +  %6 = icmp ne <64 x i8> %4, %5
>         +  %7 = bitcast <64 x i1> %6 to i64
>         +  %and.i = and i64 %7, 4294967295
>         +  %shl.i = shl i64 %3, 32
>         +  %or.i = or i64 %and.i, %shl.i
>         +  %8 = bitcast <8 x i64> %__E to <64 x i8>
>         +  %9 = bitcast <8 x i64> %__F to <64 x i8>
>         +  %10 = icmp ne <64 x i8> %8, %9
>         +  %11 = bitcast i64 %or.i to <64 x i1>
>         +  %12 = and <64 x i1> %10, %11
>         +  %13 = bitcast <64 x i1> %12 to i64
>         +  ret i64 %13
>         +}
>         +
>         +define i32 @test_mm512_kunpackw(<8 x i64> %__A, <8 x i64> %__B,
>         <8 x i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8 x i64> %__F) {
>         +; X32-LABEL: test_mm512_kunpackw:
>         +; X32:       # %bb.0: # %entry
>         +; X32-NEXT:    pushl %ebp
>         +; X32-NEXT:    .cfi_def_cfa_offset 8
>         +; X32-NEXT:    .cfi_offset %ebp, -8
>         +; X32-NEXT:    movl %esp, %ebp
>         +; X32-NEXT:    .cfi_def_cfa_register %ebp
>         +; X32-NEXT:    andl $-64, %esp
>         +; X32-NEXT:    subl $64, %esp
>         +; X32-NEXT:    vmovdqa64 136(%ebp), %zmm3
>         +; X32-NEXT:    vpcmpneqw %zmm0, %zmm1, %k0
>         +; X32-NEXT:    vpcmpneqw 8(%ebp), %zmm2, %k1
>         +; X32-NEXT:    kunpckwd %k0, %k1, %k1
>         +; X32-NEXT:    vpcmpneqw 72(%ebp), %zmm3, %k0 {%k1}
>         +; X32-NEXT:    kmovd %k0, %eax
>         +; X32-NEXT:    movl %ebp, %esp
>         +; X32-NEXT:    popl %ebp
>         +; X32-NEXT:    vzeroupper
>         +; X32-NEXT:    retl
>         +;
>         +; X64-LABEL: test_mm512_kunpackw:
>         +; X64:       # %bb.0: # %entry
>         +; X64-NEXT:    vpcmpneqw %zmm0, %zmm1, %k0
>         +; X64-NEXT:    vpcmpneqw %zmm3, %zmm2, %k1
>         +; X64-NEXT:    kunpckwd %k0, %k1, %k1
>         +; X64-NEXT:    vpcmpneqw %zmm5, %zmm4, %k0 {%k1}
>         +; X64-NEXT:    kmovd %k0, %eax
>         +; X64-NEXT:    vzeroupper
>         +; X64-NEXT:    retq
>         +entry:
>         +  %0 = bitcast <8 x i64> %__B to <32 x i16>
>         +  %1 = bitcast <8 x i64> %__A to <32 x i16>
>         +  %2 = icmp ne <32 x i16> %0, %1
>         +  %3 = bitcast <32 x i1> %2 to i32
>         +  %4 = bitcast <8 x i64> %__C to <32 x i16>
>         +  %5 = bitcast <8 x i64> %__D to <32 x i16>
>         +  %6 = icmp ne <32 x i16> %4, %5
>         +  %7 = bitcast <32 x i1> %6 to i32
>         +  %and.i = and i32 %7, 65535
>         +  %shl.i = shl i32 %3, 16
>         +  %or.i = or i32 %and.i, %shl.i
>         +  %8 = bitcast <8 x i64> %__E to <32 x i16>
>         +  %9 = bitcast <8 x i64> %__F to <32 x i16>
>         +  %10 = icmp ne <32 x i16> %8, %9
>         +  %11 = bitcast i32 %or.i to <32 x i1>
>         +  %12 = and <32 x i1> %10, %11
>         +  %13 = bitcast <32 x i1> %12 to i32
>         +  ret i32 %13
>         +}
>         +
>         +
>            define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O,
>         i64 %__M, i8 signext %__A)  {
>            ; X32-LABEL: test_mm512_mask_set1_epi8:
>            ; X32:       # %bb.0: # %entry
>         @@ -694,13 +805,13 @@ define <8 x i64> @test_mm512_mask_set1_e
>            ; X32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
>            ; X32-NEXT:    vmovdqa {{.*#+}} ymm5 =
>         [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255]
>            ; X32-NEXT:    vpblendvb %ymm5, %ymm1, %ymm2, %ymm1
>         -; X32-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
>         -; X32-NEXT:    vpmovb2m %zmm0, %k0
>         -; X32-NEXT:    vpmovm2b %k0, %zmm0
>         -; X32-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
>            ; X32-NEXT:    movl %eax, %ecx
>            ; X32-NEXT:    shrl $30, %ecx
>            ; X32-NEXT:    kmovd %ecx, %k0
>         +; X32-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
>         +; X32-NEXT:    vpmovb2m %zmm0, %k1
>         +; X32-NEXT:    vpmovm2b %k1, %zmm0
>         +; X32-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
>            ; X32-NEXT:    vpmovm2b %k0, %zmm2
>            ; X32-NEXT:    vpbroadcastw %xmm2, %xmm2
>            ; X32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
>         @@ -1422,13 +1533,13 @@ define <8 x i64> @test_mm512_maskz_set1_
>            ; X32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
>            ; X32-NEXT:    vmovdqa {{.*#+}} ymm4 =
>         [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255]
>            ; X32-NEXT:    vpblendvb %ymm4, %ymm1, %ymm2, %ymm1
>         -; X32-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
>         -; X32-NEXT:    vpmovb2m %zmm0, %k0
>         -; X32-NEXT:    vpmovm2b %k0, %zmm0
>         -; X32-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
>            ; X32-NEXT:    movl %eax, %ecx
>            ; X32-NEXT:    shrl $30, %ecx
>            ; X32-NEXT:    kmovd %ecx, %k0
>         +; X32-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
>         +; X32-NEXT:    vpmovb2m %zmm0, %k1
>         +; X32-NEXT:    vpmovm2b %k1, %zmm0
>         +; X32-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
>            ; X32-NEXT:    vpmovm2b %k0, %zmm2
>            ; X32-NEXT:    vpbroadcastw %xmm2, %xmm2
>            ; X32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
> 
>         Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
>         URL:
>         http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll?rev=319778&r1=319777&r2=319778&view=diff
>         <http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll?rev=319778&r1=319777&r2=319778&view=diff>
>         ==============================================================================
>         --- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
>         (original)
>         +++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
>         Tue Dec  5 07:42:56 2017
>         @@ -2,6 +2,45 @@
>            ; RUN: llc < %s -mtriple=x86_64-apple-darwin
>         -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL
>         --check-prefix=AVX512BW
>            ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu
>         -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL
>         --check-prefix=AVX512F-32
>            +declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32)
>         +
>         +define i32 at test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) {
>         +; AVX512BW-LABEL: test_int_x86_avx512_kunpck_wd:
>         +; AVX512BW:       ## %bb.0:
>         +; AVX512BW-NEXT:    movzwl %di, %eax
>         +; AVX512BW-NEXT:    shll $16, %esi
>         +; AVX512BW-NEXT:    orl %esi, %eax
>         +; AVX512BW-NEXT:    retq
>         +;
>         +; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd:
>         +; AVX512F-32:       # %bb.0:
>         +; AVX512F-32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
>         +; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
>         +; AVX512F-32-NEXT:    shll $16, %eax
>         +; AVX512F-32-NEXT:    orl %ecx, %eax
>         +; AVX512F-32-NEXT:    retl
>         +  %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1)
>         +  ret i32 %res
>         +}
>         +
>         +declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64)
>         +
>         +define i64 at test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
>         +; AVX512BW-LABEL: test_int_x86_avx512_kunpck_qd:
>         +; AVX512BW:       ## %bb.0:
>         +; AVX512BW-NEXT:    shlq $32, %rsi
>         +; AVX512BW-NEXT:    movq %rsi, %rax
>         +; AVX512BW-NEXT:    retq
>         +;
>         +; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd:
>         +; AVX512F-32:       # %bb.0:
>         +; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
>         +; AVX512F-32-NEXT:    xorl %eax, %eax
>         +; AVX512F-32-NEXT:    retl
>         +  %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1)
>         +  ret i64 %res
>         +}
>         +
>            declare <64 x i8>
>         @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8, <64 x i8>, i64)
>                define <64 x
>         i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x
>         i8> %x1, i64 %mask) {
> 
>         Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
>         URL:
>         http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll?rev=319778&r1=319777&r2=319778&view=diff
>         <http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll?rev=319778&r1=319777&r2=319778&view=diff>
>         ==============================================================================
>         --- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll (original)
>         +++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll Tue Dec 
>         5 07:42:56 2017
>         @@ -1455,55 +1455,6 @@ define  <8 x i64>@test_int_x86_avx512_ma
>              ret  <8 x i64> %res2
>            }
>            -declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32)
>         -
>         -define i32 at test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) {
>         -; AVX512BW-LABEL: test_int_x86_avx512_kunpck_wd:
>         -; AVX512BW:       ## %bb.0:
>         -; AVX512BW-NEXT:    kmovd %edi, %k0
>         -; AVX512BW-NEXT:    kmovd %esi, %k1
>         -; AVX512BW-NEXT:    kunpckwd %k1, %k0, %k0
>         -; AVX512BW-NEXT:    kmovd %k0, %eax
>         -; AVX512BW-NEXT:    retq
>         -;
>         -; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd:
>         -; AVX512F-32:       # %bb.0:
>         -; AVX512F-32-NEXT:    kmovw {{[0-9]+}}(%esp), %k0
>         -; AVX512F-32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
>         -; AVX512F-32-NEXT:    kunpckwd %k0, %k1, %k0
>         -; AVX512F-32-NEXT:    kmovd %k0, %eax
>         -; AVX512F-32-NEXT:    retl
>         -  %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1)
>         -  ret i32 %res
>         -}
>         -
>         -declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64)
>         -
>         -define i64 at test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
>         -; AVX512BW-LABEL: test_int_x86_avx512_kunpck_qd:
>         -; AVX512BW:       ## %bb.0:
>         -; AVX512BW-NEXT:    kmovq %rdi, %k0
>         -; AVX512BW-NEXT:    kmovq %rsi, %k1
>         -; AVX512BW-NEXT:    kunpckdq %k1, %k0, %k0
>         -; AVX512BW-NEXT:    kmovq %k0, %rax
>         -; AVX512BW-NEXT:    retq
>         -;
>         -; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd:
>         -; AVX512F-32:       # %bb.0:
>         -; AVX512F-32-NEXT:    subl $12, %esp
>         -; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
>         -; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
>         -; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
>         -; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k0
>         -; AVX512F-32-NEXT:    kmovq %k0, (%esp)
>         -; AVX512F-32-NEXT:    movl (%esp), %eax
>         -; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
>         -; AVX512F-32-NEXT:    addl $12, %esp
>         -; AVX512F-32-NEXT:    retl
>         -  %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1)
>         -  ret i64 %res
>         -}
>         -
>            declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>)
>              define i64 at test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) {
> 
> 
>         _______________________________________________
>         llvm-commits mailing list
>         llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>
>         http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>         <http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits>
> 
> 
> 
>     -- 
> 
>     -Bill Seurer
> 
>     _______________________________________________
>     llvm-commits mailing list
>     llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>
>     http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>     <http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits>
> 
> 


-- 

-Bill Seurer



More information about the llvm-commits mailing list