[llvm] r324580 - [X86] Support folding in a k-register OR when creating KORTEST from scalar compare of a bitcast from vXi1.

Roman Lebedev via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 8 05:12:04 PST 2018


On Thu, Feb 8, 2018 at 11:29 AM, Craig Topper via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
> Author: ctopper
> Date: Thu Feb  8 00:29:43 2018
> New Revision: 324580
>
> URL: http://llvm.org/viewvc/llvm-project?rev=324580&view=rev
> Log:
> [X86] Support folding in a k-register OR when creating KORTEST from scalar compare of a bitcast from vXi1.
>
> This should allow us to remove the kortest intrinsic from IR and use compare+bitcast+or in IR instead.
>
> Modified:
>     llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>     llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
>     llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=324580&r1=324579&r2=324580&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Feb  8 00:29:43 2018
> @@ -18149,7 +18149,15 @@ static SDValue EmitKTEST(SDValue Op0, SD
>    } else
>      return SDValue();
>
> -  SDValue KORTEST = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, Op0, Op0);
> +  // If the input is an OR, we can combine it's operands into the KORTEST.
> +  SDValue LHS = Op0;
> +  SDValue RHS = Op0;

> +  if (Op0.getOpcode() == ISD::OR && Op0.hasOneUse( && Op0.hasOneUse())) {
That doesn't look right.

> +    LHS = Op0.getOperand(0);
> +    RHS = Op0.getOperand(1);
> +  }
> +
> +  SDValue KORTEST = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
>    return getSETCC(X86CC, KORTEST, dl, DAG);
>  }
>
>
> Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=324580&r1=324579&r2=324580&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Thu Feb  8 00:29:43 2018
> @@ -583,8 +583,7 @@ define void @test7(<8 x i1> %mask)  {
>  ; SKX-NEXT:    vpmovw2m %xmm0, %k0
>  ; SKX-NEXT:    movb $85, %al
>  ; SKX-NEXT:    kmovd %eax, %k1
> -; SKX-NEXT:    korb %k1, %k0, %k0
> -; SKX-NEXT:    kortestb %k0, %k0
> +; SKX-NEXT:    kortestb %k1, %k0
>  ; SKX-NEXT:    retq
>  ;
>  ; AVX512BW-LABEL: test7:
> @@ -606,8 +605,7 @@ define void @test7(<8 x i1> %mask)  {
>  ; AVX512DQ-NEXT:    vptestmq %zmm0, %zmm0, %k0
>  ; AVX512DQ-NEXT:    movb $85, %al
>  ; AVX512DQ-NEXT:    kmovw %eax, %k1
> -; AVX512DQ-NEXT:    korb %k1, %k0, %k0
> -; AVX512DQ-NEXT:    kortestb %k0, %k0
> +; AVX512DQ-NEXT:    kortestb %k1, %k0
>  ; AVX512DQ-NEXT:    vzeroupper
>  ; AVX512DQ-NEXT:    retq
>  allocas:
> @@ -1787,8 +1785,7 @@ define void @ktest_2(<32 x float> %in, f
>  ; SKX-NEXT:    vcmpltps %zmm3, %zmm0, %k1
>  ; SKX-NEXT:    vcmpltps %zmm2, %zmm1, %k2
>  ; SKX-NEXT:    kunpckwd %k1, %k2, %k1
> -; SKX-NEXT:    kord %k1, %k0, %k0
> -; SKX-NEXT:    kortestd %k0, %k0
> +; SKX-NEXT:    kortestd %k1, %k0
>  ; SKX-NEXT:    je LBB43_2
>  ; SKX-NEXT:  ## %bb.1: ## %L1
>  ; SKX-NEXT:    vmovaps %zmm0, (%rdi)
> @@ -1813,8 +1810,7 @@ define void @ktest_2(<32 x float> %in, f
>  ; AVX512BW-NEXT:    vcmpltps %zmm3, %zmm0, %k1
>  ; AVX512BW-NEXT:    vcmpltps %zmm2, %zmm1, %k2
>  ; AVX512BW-NEXT:    kunpckwd %k1, %k2, %k1
> -; AVX512BW-NEXT:    kord %k1, %k0, %k0
> -; AVX512BW-NEXT:    kortestd %k0, %k0
> +; AVX512BW-NEXT:    kortestd %k1, %k0
>  ; AVX512BW-NEXT:    je LBB43_2
>  ; AVX512BW-NEXT:  ## %bb.1: ## %L1
>  ; AVX512BW-NEXT:    vmovaps %zmm0, (%rdi)
>
> Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=324580&r1=324579&r2=324580&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Thu Feb  8 00:29:43 2018
> @@ -7030,8 +7030,7 @@ define void @vcmp_test7(<8 x i1> %mask)
>  ; GENERIC-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:0.33]
>  ; GENERIC-NEXT:    movb $85, %al # sched: [1:0.33]
>  ; GENERIC-NEXT:    kmovd %eax, %k1 # sched: [1:0.33]
> -; GENERIC-NEXT:    korb %k1, %k0, %k0 # sched: [1:1.00]
> -; GENERIC-NEXT:    kortestb %k0, %k0 # sched: [1:1.00]
> +; GENERIC-NEXT:    kortestb %k1, %k0 # sched: [1:1.00]
>  ; GENERIC-NEXT:    retq # sched: [1:1.00]
>  ;
>  ; SKX-LABEL: vcmp_test7:
> @@ -7040,8 +7039,7 @@ define void @vcmp_test7(<8 x i1> %mask)
>  ; SKX-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:1.00]
>  ; SKX-NEXT:    movb $85, %al # sched: [1:0.25]
>  ; SKX-NEXT:    kmovd %eax, %k1 # sched: [1:1.00]
> -; SKX-NEXT:    korb %k1, %k0, %k0 # sched: [1:1.00]
> -; SKX-NEXT:    kortestb %k0, %k0 # sched: [3:1.00]
> +; SKX-NEXT:    kortestb %k1, %k0 # sched: [3:1.00]
>  ; SKX-NEXT:    retq # sched: [7:1.00]
>  allocas:
>    %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
> @@ -7683,8 +7681,7 @@ define void @ktest_2(<32 x float> %in, f
>  ; GENERIC-NEXT:    vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00]
>  ; GENERIC-NEXT:    vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00]
>  ; GENERIC-NEXT:    kunpckwd %k1, %k2, %k1 # sched: [1:1.00]
> -; GENERIC-NEXT:    kord %k1, %k0, %k0 # sched: [1:1.00]
> -; GENERIC-NEXT:    kortestd %k0, %k0 # sched: [1:1.00]
> +; GENERIC-NEXT:    kortestd %k1, %k0 # sched: [1:1.00]
>  ; GENERIC-NEXT:    je .LBB411_2 # sched: [1:1.00]
>  ; GENERIC-NEXT:  # %bb.1: # %L1
>  ; GENERIC-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
> @@ -7703,14 +7700,13 @@ define void @ktest_2(<32 x float> %in, f
>  ; SKX-NEXT:    vmovups 64(%rdi), %zmm3 # sched: [8:0.50]
>  ; SKX-NEXT:    vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00]
>  ; SKX-NEXT:    vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00]
> +; SKX-NEXT:    kunpckwd %k1, %k2, %k0 # sched: [3:1.00]
>  ; SKX-NEXT:    vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [8:0.50]
>  ; SKX-NEXT:    vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [8:0.50]
> -; SKX-NEXT:    kunpckwd %k1, %k2, %k0 # sched: [3:1.00]
>  ; SKX-NEXT:    vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00]
>  ; SKX-NEXT:    vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00]
>  ; SKX-NEXT:    kunpckwd %k1, %k2, %k1 # sched: [3:1.00]
> -; SKX-NEXT:    kord %k1, %k0, %k0 # sched: [1:1.00]
> -; SKX-NEXT:    kortestd %k0, %k0 # sched: [3:1.00]
> +; SKX-NEXT:    kortestd %k1, %k0 # sched: [3:1.00]
>  ; SKX-NEXT:    je .LBB411_2 # sched: [1:0.50]
>  ; SKX-NEXT:  # %bb.1: # %L1
>  ; SKX-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits


More information about the llvm-commits mailing list