[llvm] r319778 - [x86][AVX512] Lowering kunpack intrinsics to LLVM IR
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 6 09:56:42 PST 2017
I've attempted a fix for at least the ubsan failure in r319911
~Craig
On Wed, Dec 6, 2017 at 9:49 AM, Bill Seurer via llvm-commits <
llvm-commits at lists.llvm.org> wrote:
> This is still causing failures on all the powerpc64 buildbots:
>
> ******************** TEST 'LLVM :: CodeGen/X86/avx512bw-intrinsics-upgrade.ll'
> FAILED ********************
> Script:
> --
> /home/buildbots/ppc64be-clang-test/clang-ppc64be/stage1/bin/llc <
> /home/buildbots/ppc64be-clang-test/clang-ppc64be/llvm/test/C
> odeGen/X86/avx512bw-intrinsics-upgrade.ll -mtriple=x86_64-apple-darwin
> -mattr=+avx512f,+avx512bw | /home/buildbots/ppc64be-clang-
> test/clang-ppc64be/stage1/bin/FileCheck /home/buildbots/ppc64be-clang-
> test/clang-ppc64be/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
> --check-prefix=ALL --check-prefix=AVX512BW
> /home/buildbots/ppc64be-clang-test/clang-ppc64be/stage1/bin/llc <
> /home/buildbots/ppc64be-clang-test/clang-ppc64be/llvm/test/C
> odeGen/X86/avx512bw-intrinsics-upgrade.ll -mtriple=i386-unknown-linux-gnu
> -mattr=+avx512f,+avx512bw | /home/buildbots/ppc64be-clang-
> test/clang-ppc64be/stage1/bin/FileCheck /home/buildbots/ppc64be-clang-
> test/clang-ppc64be/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
> --check-prefix=ALL --check-prefix=AVX512F-32
> --
> Exit Code: 1
>
> Command Output (stderr):
> --
> /home/buildbots/ppc64be-clang-test/clang-ppc64be/llvm/test/C
> odeGen/X86/avx512bw-intrinsics-upgrade.ll:32:18: error: AVX512BW-NEXT: is
> not on the line after the previous match
> ; AVX512BW-NEXT: movq %rsi, %rax
> ^
> <stdin>:20:2: note: 'next' match was here
> movq %rsi, %rax
> ^
> <stdin>:18:16: note: previous match ended here
> shlq $32, %rsi
> ^
> <stdin>:19:1: note: non-matching line after previous match is here
> orq %rdi, %rsi
> ^
>
> --
>
> ********************
>
>
> On 12/05/2017 09:42 AM, Jina Nahias via llvm-commits wrote:
>
>> Author: jina.nahias
>> Date: Tue Dec 5 07:42:56 2017
>> New Revision: 319778
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=319778&view=rev
>> Log:
>> [x86][AVX512] Lowering kunpack intrinsics to LLVM IR
>>
>> This patch, together with a matching clang patch (
>> https://reviews.llvm.org/D39719), implements the lowering of X86 kunpack
>> intrinsics to IR.
>>
>> Differential Revision: https://reviews.llvm.org/D39720
>>
>> Change-Id: I4088d9428478f9457f6afddc90bd3d66b3daf0a1
>>
>> Modified:
>> llvm/trunk/include/llvm/IR/IntrinsicsX86.td
>> llvm/trunk/lib/IR/AutoUpgrade.cpp
>> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>> llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
>> llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
>> llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
>> llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
>> llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
>> llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
>> llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
>>
>> Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/
>> IR/IntrinsicsX86.td?rev=319778&r1=319777&r2=319778&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
>> +++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Tue Dec 5 07:42:56 2017
>> @@ -3738,15 +3738,6 @@ let TargetPrefix = "x86" in { // All in
>> def int_x86_avx512_kxnor_w : GCCBuiltin<"__builtin_ia32_kxnorhi">,
>> Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
>> [IntrNoMem]>;
>> - def int_x86_avx512_kunpck_bw : GCCBuiltin<"__builtin_ia32_kunpckhi">,
>> - Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
>> - [IntrNoMem]>;
>> - def int_x86_avx512_kunpck_wd : GCCBuiltin<"__builtin_ia32_kunpcksi">,
>> - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
>> - [IntrNoMem]>;
>> - def int_x86_avx512_kunpck_dq : GCCBuiltin<"__builtin_ia32_kunpckdi">,
>> - Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
>> - [IntrNoMem]>;
>> def int_x86_avx512_kortestz_w : GCCBuiltin<"__builtin_ia32_kor
>> testzhi">,
>> Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty],
>> [IntrNoMem]>;
>>
>> Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUp
>> grade.cpp?rev=319778&r1=319777&r2=319778&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
>> +++ llvm/trunk/lib/IR/AutoUpgrade.cpp Tue Dec 5 07:42:56 2017
>> @@ -78,6 +78,7 @@ static bool ShouldUpgradeX86Intrinsic(Fu
>> Name=="ssse3.pabs.d.128" || // Added in 6.0
>> Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
>> Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
>> + Name.startswith("avx512.kunpck") || //added in 6.0
>> Name.startswith("avx2.pabs.") || // Added in 6.0
>> Name.startswith("avx512.mask.pabs.") || // Added in 6.0
>> Name.startswith("avx512.broadcastm") || // Added in 6.0
>> @@ -1065,6 +1066,12 @@ void llvm::UpgradeIntrinsicCall(CallInst
>> Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
>> Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
>> CI->getArgOperand(1));
>> + } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
>> + uint64_t Shift = CI->getType()->getScalarSizeInBits() / 2;
>> + uint64_t And = (1 << Shift) - 1;
>> + Value* LowBits = Builder.CreateAnd(CI->getArgOperand(0), And);
>> + Value* HighBits = Builder.CreateShl(CI->getArgOperand(1), Shift);
>> + Rep = Builder.CreateOr(LowBits, HighBits);
>> } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd"))
>> {
>> Type *I32Ty = Type::getInt32Ty(C);
>> Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
>>
>> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X8
>> 6/X86ISelLowering.cpp?rev=319778&r1=319777&r2=319778&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Dec 5 07:42:56
>> 2017
>> @@ -30007,6 +30007,53 @@ static SDValue combineBitcastvxi1(Select
>> SDValue N0 = BitCast.getOperand(0);
>> EVT VecVT = N0->getValueType(0);
>> + if (VT.isVector() && VecVT.isScalarInteger() &&
>> Subtarget.hasAVX512() &&
>> + N0->getOpcode() == ISD::OR) {
>> + SDValue Op0 = N0->getOperand(0);
>> + SDValue Op1 = N0->getOperand(1);
>> + MVT TrunckVT;
>> + MVT BitcastVT;
>> + switch (VT.getSimpleVT().SimpleTy) {
>> + default:
>> + return SDValue();
>> + case MVT::v16i1:
>> + TrunckVT = MVT::i8;
>> + BitcastVT = MVT::v8i1;
>> + break;
>> + case MVT::v32i1:
>> + TrunckVT = MVT::i16;
>> + BitcastVT = MVT::v16i1;
>> + break;
>> + case MVT::v64i1:
>> + TrunckVT = MVT::i32;
>> + BitcastVT = MVT::v32i1;
>> + break;
>> + }
>> + bool isArg0UndefRight = Op0->getOpcode() == ISD::SHL;
>> + bool isArg0UndefLeft =
>> + Op0->getOpcode() == ISD::ZERO_EXTEND || Op0->getOpcode() ==
>> ISD::AND;
>> + bool isArg1UndefRight = Op1->getOpcode() == ISD::SHL;
>> + bool isArg1UndefLeft =
>> + Op1->getOpcode() == ISD::ZERO_EXTEND || Op1->getOpcode() ==
>> ISD::AND;
>> + SDValue OpLeft;
>> + SDValue OpRight;
>> + if (isArg0UndefRight && isArg1UndefLeft) {
>> + OpLeft = Op0;
>> + OpRight = Op1;
>> + } else if (isArg1UndefRight && isArg0UndefLeft) {
>> + OpLeft = Op1;
>> + OpRight = Op0;
>> + } else
>> + return SDValue();
>> + SDLoc DL(BitCast);
>> + SDValue Shr = OpLeft->getOperand(0);
>> + SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, TrunckVT, Shr);
>> + SDValue Bitcast1 = DAG.getBitcast(BitcastVT, Trunc1);
>> + SDValue Trunc2 = DAG.getNode(ISD::TRUNCATE, DL, TrunckVT, OpRight);
>> + SDValue Bitcast2 = DAG.getBitcast(BitcastVT, Trunc2);
>> + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Bitcast1, Bitcast2);
>> + }
>> +
>> if (!VT.isScalarInteger() || !VecVT.isSimple())
>> return SDValue();
>>
>> Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X8
>> 6/X86IntrinsicsInfo.h?rev=319778&r1=319777&r2=319778&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
>> +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Tue Dec 5 07:42:56
>> 2017
>> @@ -479,9 +479,6 @@ static const IntrinsicData IntrinsicsWi
>> X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM,
>> X86ISD::EXP2, 0),
>> X86_INTRINSIC_DATA(avx512_kand_w, MASK_BINOP, ISD::AND, 0),
>> X86_INTRINSIC_DATA(avx512_kor_w, MASK_BINOP, ISD::OR, 0),
>> - X86_INTRINSIC_DATA(avx512_kunpck_bw, KUNPCK, ISD::CONCAT_VECTORS, 0),
>> - X86_INTRINSIC_DATA(avx512_kunpck_dq, KUNPCK, ISD::CONCAT_VECTORS, 0),
>> - X86_INTRINSIC_DATA(avx512_kunpck_wd, KUNPCK, ISD::CONCAT_VECTORS, 0),
>> X86_INTRINSIC_DATA(avx512_kxor_w, MASK_BINOP, ISD::XOR, 0),
>> X86_INTRINSIC_DATA(avx512_mask_add_pd_512, INTR_TYPE_2OP_MASK,
>> ISD::FADD,
>> X86ISD::FADD_RND),
>>
>> Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
>> X86/avx512-intrinsics-fast-isel.ll?rev=319778&r1=319777&
>> r2=319778&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll Tue Dec
>> 5 07:42:56 2017
>> @@ -5,6 +5,59 @@
>> ; NOTE: This should use IR equivalent to what is generated by
>> clang/test/CodeGen/avx512f-builtins.c
>> +define zeroext i16 @test_mm512_kunpackb(<8 x i64> %__A, <8 x i64>
>> %__B, <8 x i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8 x i64> %__F)
>> local_unnamed_addr #0 {
>> +; X32-LABEL: test_mm512_kunpackb:
>> +; X32: # %bb.0: # %entry
>> +; X32-NEXT: pushl %ebp
>> +; X32-NEXT: .cfi_def_cfa_offset 8
>> +; X32-NEXT: .cfi_offset %ebp, -8
>> +; X32-NEXT: movl %esp, %ebp
>> +; X32-NEXT: .cfi_def_cfa_register %ebp
>> +; X32-NEXT: andl $-64, %esp
>> +; X32-NEXT: subl $64, %esp
>> +; X32-NEXT: vmovdqa64 136(%ebp), %zmm3
>> +; X32-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
>> +; X32-NEXT: vpcmpneqd 8(%ebp), %zmm2, %k1
>> +; X32-NEXT: kunpckbw %k0, %k1, %k1
>> +; X32-NEXT: vpcmpneqd 72(%ebp), %zmm3, %k0 {%k1}
>> +; X32-NEXT: kmovw %k0, %eax
>> +; X32-NEXT: movzwl %ax, %eax
>> +; X32-NEXT: movl %ebp, %esp
>> +; X32-NEXT: popl %ebp
>> +; X32-NEXT: vzeroupper
>> +; X32-NEXT: retl
>> +;
>> +; X64-LABEL: test_mm512_kunpackb:
>> +; X64: # %bb.0: # %entry
>> +; X64-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
>> +; X64-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
>> +; X64-NEXT: kunpckbw %k0, %k1, %k1
>> +; X64-NEXT: vpcmpneqd %zmm5, %zmm4, %k0 {%k1}
>> +; X64-NEXT: kmovw %k0, %eax
>> +; X64-NEXT: movzwl %ax, %eax
>> +; X64-NEXT: vzeroupper
>> +; X64-NEXT: retq
>> +entry:
>> + %0 = bitcast <8 x i64> %__A to <16 x i32>
>> + %1 = bitcast <8 x i64> %__B to <16 x i32>
>> + %2 = icmp ne <16 x i32> %0, %1
>> + %3 = bitcast <16 x i1> %2 to i16
>> + %4 = bitcast <8 x i64> %__C to <16 x i32>
>> + %5 = bitcast <8 x i64> %__D to <16 x i32>
>> + %6 = icmp ne <16 x i32> %4, %5
>> + %7 = bitcast <16 x i1> %6 to i16
>> + %8 = and i16 %7, 255
>> + %shl.i = shl i16 %3, 8
>> + %or.i = or i16 %8, %shl.i
>> + %9 = bitcast <8 x i64> %__E to <16 x i32>
>> + %10 = bitcast <8 x i64> %__F to <16 x i32>
>> + %11 = icmp ne <16 x i32> %9, %10
>> + %12 = bitcast i16 %or.i to <16 x i1>
>> + %13 = and <16 x i1> %11, %12
>> + %14 = bitcast <16 x i1> %13 to i16
>> + ret i16 %14
>> +}
>> +
>> define <16 x float> @test_mm512_shuffle_f32x4(<16 x float> %__A, <16 x
>> float> %__B) {
>> ; X32-LABEL: test_mm512_shuffle_f32x4:
>> ; X32: # %bb.0: # %entry
>>
>> Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
>> X86/avx512-intrinsics-upgrade.ll?rev=319778&r1=319777&r2=319778&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll Tue Dec 5
>> 07:42:56 2017
>> @@ -1,7 +1,21 @@
>> ; NOTE: Assertions have been autogenerated by
>> utils/update_llc_test_checks.py
>> ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
>> - define <16 x i32>@test_int_x86_avx512_mask_pbroadcastd_gpr_512(i32
>> %x0, <16 x i32> %x1, i16 %mask) {
>> +declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
>> +
>> +define i16 @unpckbw_test(i16 %a0, i16 %a1) {
>> +; CHECK-LABEL: unpckbw_test:
>> +; CHECK: ## %bb.0:
>> +; CHECK-NEXT: movzbl %dil, %eax
>> +; CHECK-NEXT: shll $8, %esi
>> +; CHECK-NEXT: orl %esi, %eax
>> +; CHECK-NEXT: ## kill: %ax<def> %ax<kill> %eax<kill>
>> +; CHECK-NEXT: retq
>> + %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
>> + ret i16 %res
>> +}
>> +
>> +define <16 x i32>@test_int_x86_avx512_mask_pbroadcastd_gpr_512(i32 %x0,
>> <16 x i32> %x1, i16 %mask) {
>> ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512:
>> ; CHECK: ## %bb.0:
>> ; CHECK-NEXT: vpbroadcastd %edi, %zmm1
>>
>> Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
>> X86/avx512-intrinsics.ll?rev=319778&r1=319777&r2=319778&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Tue Dec 5 07:42:56
>> 2017
>> @@ -96,21 +96,6 @@ define i16 @test_kor(i16 %a0, i16 %a1) {
>> ret i16 %t2
>> }
>> -declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
>> -
>> -define i16 @unpckbw_test(i16 %a0, i16 %a1) {
>> -; CHECK-LABEL: unpckbw_test:
>> -; CHECK: ## %bb.0:
>> -; CHECK-NEXT: kmovw %edi, %k0
>> -; CHECK-NEXT: kmovw %esi, %k1
>> -; CHECK-NEXT: kunpckbw %k1, %k0, %k0
>> -; CHECK-NEXT: kmovw %k0, %eax
>> -; CHECK-NEXT: ## kill: %ax<def> %ax<kill> %eax<kill>
>> -; CHECK-NEXT: retq
>> - %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
>> - ret i16 %res
>> -}
>> -
>> declare i16 @llvm.x86.avx512.kxnor.w(i16, i16) nounwind readnone
>> ; TODO: the two kxnor instructions here a no op and should be
>> elimintaed,
>> ; probably by FoldConstantArithmetic in SelectionDAG.
>>
>> Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
>> X86/avx512bw-intrinsics-fast-isel.ll?rev=319778&r1=319777&
>> r2=319778&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
>> (original)
>> +++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll Tue
>> Dec 5 07:42:56 2017
>> @@ -4,6 +4,117 @@
>> ; NOTE: This should use IR equivalent to what is generated by
>> clang/test/CodeGen/avx512bw-builtins.c
>> +define i64 @test_mm512_kunpackd(<8 x i64> %__A, <8 x i64> %__B, <8 x
>> i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8 x i64> %__F) {
>> +; X32-LABEL: test_mm512_kunpackd:
>> +; X32: # %bb.0: # %entry
>> +; X32-NEXT: pushl %ebp
>> +; X32-NEXT: .cfi_def_cfa_offset 8
>> +; X32-NEXT: .cfi_offset %ebp, -8
>> +; X32-NEXT: movl %esp, %ebp
>> +; X32-NEXT: .cfi_def_cfa_register %ebp
>> +; X32-NEXT: andl $-64, %esp
>> +; X32-NEXT: subl $64, %esp
>> +; X32-NEXT: vmovdqa64 136(%ebp), %zmm3
>> +; X32-NEXT: vmovdqa64 72(%ebp), %zmm4
>> +; X32-NEXT: vmovdqa64 8(%ebp), %zmm5
>> +; X32-NEXT: vpcmpneqb %zmm0, %zmm1, %k0
>> +; X32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
>> +; X32-NEXT: vpcmpneqb %zmm5, %zmm2, %k0
>> +; X32-NE <https://maps.google.com/?q=%2B;+X32-NE&entry=gmail&source=g>XT:
>> kmovq %k0, {{[0-9]+}}(%esp)
>> +; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
>> +; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
>> +; X32-NEXT: kunpckdq %k0, %k1, %k1
>> +; X32-NEXT: vpcmpneqb %zmm3, %zmm4, %k0
>> <https://maps.google.com/?q=%25zmm4,+%25k0&entry=gmail&source=g> {%k1}
>> +; X32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
>> +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
>> +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
>> +; X32-NEXT: movl %ebp, %esp
>> +; X32-NEXT: popl %ebp
>> +; X32-NEXT: vzeroupper
>> +; X32-NEXT: retl
>> +;
>> +; X64-LABEL: test_mm512_kunpackd:
>> +; X64: # %bb.0: # %entry
>> +; X64-NEXT: vpcmpneqb %zmm0, %zmm1, %k0
>> +; X64-NEXT: vpcmpneqb %zmm3, %zmm2, %k1
>> +; X64-NEXT: kunpckdq %k0, %k1, %k1
>> +; X64-NEXT: vpcmpneqb %zmm5, %zmm4, %k0 {%k1}
>> +; X64-NEXT: kmovq %k0, %rax
>> +; X64-NEXT: vzeroupper
>> +; X64-NEXT: retq
>> +entry:
>> + %0 = bitcast <8 x i64> %__B to <64 x i8>
>> + %1 = bitcast <8 x i64> %__A to <64 x i8>
>> + %2 = icmp ne <64 x i8> %0, %1
>> + %3 = bitcast <64 x i1> %2 to i64
>> + %4 = bitcast <8 x i64> %__C to <64 x i8>
>> + %5 = bitcast <8 x i64> %__D to <64 x i8>
>> + %6 = icmp ne <64 x i8> %4, %5
>> + %7 = bitcast <64 x i1> %6 to i64
>> + %and.i = and i64 %7, 4294967295
>> + %shl.i = shl i64 %3, 32
>> + %or.i = or i64 %and.i, %shl.i
>> + %8 = bitcast <8 x i64> %__E to <64 x i8>
>> + %9 = bitcast <8 x i64> %__F to <64 x i8>
>> + %10 = icmp ne <64 x i8> %8, %9
>> + %11 = bitcast i64 %or.i to <64 x i1>
>> + %12 = and <64 x i1> %10, %11
>> + %13 = bitcast <64 x i1> %12 to i64
>> + ret i64 %13
>> +}
>> +
>> +define i32 @test_mm512_kunpackw(<8 x i64> %__A, <8 x i64> %__B, <8 x
>> i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8 x i64> %__F) {
>> +; X32-LABEL: test_mm512_kunpackw:
>> +; X32: # %bb.0: # %entry
>> +; X32-NEXT: pushl %ebp
>> +; X32-NEXT: .cfi_def_cfa_offset 8
>> +; X32-NEXT: .cfi_offset %ebp, -8
>> +; X32-NEXT: movl %esp, %ebp
>> +; X32-NEXT: .cfi_def_cfa_register %ebp
>> +; X32-NEXT: andl $-64, %esp
>> +; X32-NEXT: subl $64, %esp
>> +; X32-NEXT: vmovdqa64 136(%ebp), %zmm3
>> +; X32-NEXT: vpcmpneqw %zmm0, %zmm1, %k0
>> +; X32-NEXT: vpcmpneqw 8(%ebp), %zmm2, %k1
>> +; X32-NEXT: kunpckwd %k0, %k1, %k1
>> +; X32-NEXT: vpcmpneqw 72(%ebp), %zmm3, %k0 {%k1}
>> +; X32-NEXT: kmovd %k0, %eax
>> +; X32-NEXT: movl %ebp, %esp
>> +; X32-NEXT: popl %ebp
>> +; X32-NEXT: vzeroupper
>> +; X32-NEXT: retl
>> +;
>> +; X64-LABEL: test_mm512_kunpackw:
>> +; X64: # %bb.0: # %entry
>> +; X64-NEXT: vpcmpneqw %zmm0, %zmm1, %k0
>> +; X64-NEXT: vpcmpneqw %zmm3, %zmm2, %k1
>> +; X64-NEXT: kunpckwd %k0, %k1, %k1
>> +; X64-NEXT: vpcmpneqw %zmm5, %zmm4, %k0 {%k1}
>> +; X64-NEXT: kmovd %k0, %eax
>> +; X64-NEXT: vzeroupper
>> +; X64-NEXT: retq
>> +entry:
>> + %0 = bitcast <8 x i64> %__B to <32 x i16>
>> + %1 = bitcast <8 x i64> %__A to <32 x i16>
>> + %2 = icmp ne <32 x i16> %0, %1
>> + %3 = bitcast <32 x i1> %2 to i32
>> + %4 = bitcast <8 x i64> %__C to <32 x i16>
>> + %5 = bitcast <8 x i64> %__D to <32 x i16>
>> + %6 = icmp ne <32 x i16> %4, %5
>> + %7 = bitcast <32 x i1> %6 to i32
>> + %and.i = and i32 %7, 65535
>> + %shl.i = shl i32 %3, 16
>> + %or.i = or i32 %and.i, %shl.i
>> + %8 = bitcast <8 x i64> %__E to <32 x i16>
>> + %9 = bitcast <8 x i64> %__F to <32 x i16>
>> + %10 = icmp ne <32 x i16> %8, %9
>> + %11 = bitcast i32 %or.i to <32 x i1>
>> + %12 = and <32 x i1> %10, %11
>> + %13 = bitcast <32 x i1> %12 to i32
>> + ret i32 %13
>> +}
>> +
>> +
>> define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M,
>> i8 signext %__A) {
>> ; X32-LABEL: test_mm512_mask_set1_epi8:
>> ; X32: # %bb.0: # %entry
>> @@ -694,13 +805,13 @@ define <8 x i64> @test_mm512_mask_set1_e
>> ; X32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
>> ; X32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,2
>> 55,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
>> 255,255,255,255,255,255,255,0,255,255]
>> ; X32-NEXT: vpblendvb %ymm5, %ymm1, %ymm2, %ymm1
>> -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
>> -; X32-NEXT: vpmovb2m %zmm0, %k0
>> -; X32-NEXT: vpmovm2b %k0, %zmm0
>> -; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
>> ; X32-NEXT: movl %eax, %ecx
>> ; X32-NEXT: shrl $30, %ecx
>> ; X32-NEXT: kmovd %ecx, %k0
>> +; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
>> +; X32-NEXT: vpmovb2m %zmm0, %k1
>> +; X32-NEXT: vpmovm2b %k1, %zmm0
>> +; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
>> ; X32-NEXT: vpmovm2b %k0, %zmm2
>> ; X32-NEXT: vpbroadcastw %xmm2, %xmm2
>> ; X32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
>> @@ -1422,13 +1533,13 @@ define <8 x i64> @test_mm512_maskz_set1_
>> ; X32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
>> ; X32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,2
>> 55,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
>> 255,255,255,255,255,255,255,0,255,255]
>> ; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm2, %ymm1
>> -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
>> -; X32-NEXT: vpmovb2m %zmm0, %k0
>> -; X32-NEXT: vpmovm2b %k0, %zmm0
>> -; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
>> ; X32-NEXT: movl %eax, %ecx
>> ; X32-NEXT: shrl $30, %ecx
>> ; X32-NEXT: kmovd %ecx, %k0
>> +; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
>> +; X32-NEXT: vpmovb2m %zmm0, %k1
>> +; X32-NEXT: vpmovm2b %k1, %zmm0
>> +; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
>> ; X32-NEXT: vpmovm2b %k0, %zmm2
>> ; X32-NEXT: vpbroadcastw %xmm2, %xmm2
>> ; X32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
>>
>> Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
>> X86/avx512bw-intrinsics-upgrade.ll?rev=319778&r1=
>> 319777&r2=319778&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll Tue Dec
>> 5 07:42:56 2017
>> @@ -2,6 +2,45 @@
>> ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw
>> | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
>> ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu
>> -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL
>> --check-prefix=AVX512F-32
>> +declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32)
>> +
>> +define i32 at test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) {
>> +; AVX512BW-LABEL: test_int_x86_avx512_kunpck_wd:
>> +; AVX512BW: ## %bb.0:
>> +; AVX512BW-NEXT: movzwl %di, %eax
>> +; AVX512BW-NEXT: shll $16, %esi
>> +; AVX512BW-NEXT: orl %esi, %eax
>> +; AVX512BW-NEXT: retq
>> +;
>> +; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd:
>> +; AVX512F-32: # %bb.0:
>> +; AVX512F-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
>> +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
>> +; AVX512F-32-NEXT: shll $16, %eax
>> +; AVX512F-32-NEXT: orl %ecx, %eax
>> +; AVX512F-32-NEXT: retl
>> + %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1)
>> + ret i32 %res
>> +}
>> +
>> +declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64)
>> +
>> +define i64 at test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
>> +; AVX512BW-LABEL: test_int_x86_avx512_kunpck_qd:
>> +; AVX512BW: ## %bb.0:
>> +; AVX512BW-NEXT: shlq $32, %rsi
>> +; AVX512BW-NEXT: movq %rsi, %rax
>> +; AVX512BW-NEXT: retq
>> +;
>> +; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd:
>> +; AVX512F-32: # %bb.0:
>> +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
>> +; AVX512F-32-NEXT: xorl %eax, %eax
>> +; AVX512F-32-NEXT: retl
>> + %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1)
>> + ret i64 %res
>> +}
>> +
>> declare <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8, <64 x
>> i8>, i64)
>> define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8
>> %x0, <64 x i8> %x1, i64 %mask) {
>>
>> Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
>> X86/avx512bw-intrinsics.ll?rev=319778&r1=319777&r2=319778&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll Tue Dec 5
>> 07:42:56 2017
>> @@ -1455,55 +1455,6 @@ define <8 x i64>@test_int_x86_avx512_ma
>> ret <8 x i64> %res2
>> }
>> -declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32)
>> -
>> -define i32 at test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) {
>> -; AVX512BW-LABEL: test_int_x86_avx512_kunpck_wd:
>> -; AVX512BW: ## %bb.0:
>> -; AVX512BW-NEXT: kmovd %edi, %k0
>> -; AVX512BW-NEXT: kmovd %esi, %k1
>> -; AVX512BW-NEXT: kunpckwd %k1, %k0, %k0
>> -; AVX512BW-NEXT: kmovd %k0, %eax
>> -; AVX512BW-NEXT: retq
>> -;
>> -; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd:
>> -; AVX512F-32: # %bb.0:
>> -; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k0
>> -; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
>> -; AVX512F-32-NEXT: kunpckwd %k0, %k1, %k0
>> -; AVX512F-32-NEXT: kmovd %k0, %eax
>> -; AVX512F-32-NEXT: retl
>> - %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1)
>> - ret i32 %res
>> -}
>> -
>> -declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64)
>> -
>> -define i64 at test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
>> -; AVX512BW-LABEL: test_int_x86_avx512_kunpck_qd:
>> -; AVX512BW: ## %bb.0:
>> -; AVX512BW-NEXT: kmovq %rdi, %k0
>> -; AVX512BW-NEXT: kmovq %rsi, %k1
>> -; AVX512BW-NEXT: kunpckdq %k1, %k0, %k0
>> -; AVX512BW-NEXT: kmovq %k0, %rax
>> -; AVX512BW-NEXT: retq
>> -;
>> -; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd:
>> -; AVX512F-32: # %bb.0:
>> -; AVX512F-32-NEXT: subl $12, %esp
>> -; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
>> -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
>> -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
>> -; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k0
>> -; AVX512F-32-NEXT: kmovq %k0, (%esp)
>> -; AVX512F-32-NEXT: movl (%esp), %eax
>> -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
>> -; AVX512F-32-NEXT: addl $12, %esp
>> -; AVX512F-32-NEXT: retl
>> - %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1)
>> - ret i64 %res
>> -}
>> -
>> declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>)
>> define i64 at test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) {
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>
>>
>
> --
>
> -Bill Seurer
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20171206/ee870a0a/attachment.html>
More information about the llvm-commits
mailing list