[llvm] r350287 - [X86] Add test cases for opportunities to use KTEST when check if the result of ANDing two mask registers is zero.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 2 23:12:54 PST 2019


Author: ctopper
Date: Wed Jan  2 23:12:54 2019
New Revision: 350287

URL: http://llvm.org/viewvc/llvm-project?rev=350287&view=rev
Log:
[X86] Add test cases for opportunities to use KTEST when check if the result of ANDing two mask registers is zero.

The test cases are constructed to avoid folding the AND into a masked compare operation.

Currently we emit a KAND and a KORTEST for these cases.

Modified:
    llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll

Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=350287&r1=350286&r2=350287&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Wed Jan  2 23:12:54 2019
@@ -3479,3 +3479,729 @@ define void @mask_not_cast(i8*, <8 x i64
   ret void
 }
 declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
+
+define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
+; KNL-LABEL: ktest_3:
+; KNL:       ## %bb.0:
+; KNL-NEXT:    pushq %rax
+; KNL-NEXT:    .cfi_def_cfa_offset 16
+; KNL-NEXT:    ## kill: def $ymm3 killed $ymm3 def $zmm3
+; KNL-NEXT:    ## kill: def $ymm2 killed $ymm2 def $zmm2
+; KNL-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
+; KNL-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
+; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
+; KNL-NEXT:    vptestnmd %zmm1, %zmm1, %k1
+; KNL-NEXT:    vptestnmd %zmm2, %zmm2, %k2
+; KNL-NEXT:    vptestnmd %zmm3, %zmm3, %k3
+; KNL-NEXT:    korw %k1, %k0, %k0
+; KNL-NEXT:    korw %k3, %k2, %k1
+; KNL-NEXT:    kandw %k1, %k0, %k0
+; KNL-NEXT:    kmovw %k0, %eax
+; KNL-NEXT:    testb %al, %al
+; KNL-NEXT:    je LBB71_1
+; KNL-NEXT:  ## %bb.2: ## %exit
+; KNL-NEXT:    popq %rax
+; KNL-NEXT:    vzeroupper
+; KNL-NEXT:    retq
+; KNL-NEXT:  LBB71_1: ## %bar
+; KNL-NEXT:    vzeroupper
+; KNL-NEXT:    callq _foo
+; KNL-NEXT:    popq %rax
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: ktest_3:
+; SKX:       ## %bb.0:
+; SKX-NEXT:    pushq %rax
+; SKX-NEXT:    .cfi_def_cfa_offset 16
+; SKX-NEXT:    vptestnmd %ymm0, %ymm0, %k0
+; SKX-NEXT:    vptestnmd %ymm1, %ymm1, %k1
+; SKX-NEXT:    korb %k1, %k0, %k0
+; SKX-NEXT:    vptestnmd %ymm2, %ymm2, %k1
+; SKX-NEXT:    vptestnmd %ymm3, %ymm3, %k2
+; SKX-NEXT:    korb %k2, %k1, %k1
+; SKX-NEXT:    kandb %k1, %k0, %k0
+; SKX-NEXT:    kortestb %k0, %k0
+; SKX-NEXT:    je LBB71_1
+; SKX-NEXT:  ## %bb.2: ## %exit
+; SKX-NEXT:    popq %rax
+; SKX-NEXT:    vzeroupper
+; SKX-NEXT:    retq
+; SKX-NEXT:  LBB71_1: ## %bar
+; SKX-NEXT:    vzeroupper
+; SKX-NEXT:    callq _foo
+; SKX-NEXT:    popq %rax
+; SKX-NEXT:    retq
+;
+; AVX512BW-LABEL: ktest_3:
+; AVX512BW:       ## %bb.0:
+; AVX512BW-NEXT:    pushq %rax
+; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
+; AVX512BW-NEXT:    ## kill: def $ymm3 killed $ymm3 def $zmm3
+; AVX512BW-NEXT:    ## kill: def $ymm2 killed $ymm2 def $zmm2
+; AVX512BW-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
+; AVX512BW-NEXT:    vptestnmd %zmm1, %zmm1, %k1
+; AVX512BW-NEXT:    vptestnmd %zmm2, %zmm2, %k2
+; AVX512BW-NEXT:    vptestnmd %zmm3, %zmm3, %k3
+; AVX512BW-NEXT:    korw %k1, %k0, %k0
+; AVX512BW-NEXT:    korw %k3, %k2, %k1
+; AVX512BW-NEXT:    kandw %k1, %k0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb %al, %al
+; AVX512BW-NEXT:    je LBB71_1
+; AVX512BW-NEXT:  ## %bb.2: ## %exit
+; AVX512BW-NEXT:    popq %rax
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+; AVX512BW-NEXT:  LBB71_1: ## %bar
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    callq _foo
+; AVX512BW-NEXT:    popq %rax
+; AVX512BW-NEXT:    retq
+;
+; AVX512DQ-LABEL: ktest_3:
+; AVX512DQ:       ## %bb.0:
+; AVX512DQ-NEXT:    pushq %rax
+; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
+; AVX512DQ-NEXT:    ## kill: def $ymm3 killed $ymm3 def $zmm3
+; AVX512DQ-NEXT:    ## kill: def $ymm2 killed $ymm2 def $zmm2
+; AVX512DQ-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512DQ-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512DQ-NEXT:    vptestnmd %zmm0, %zmm0, %k0
+; AVX512DQ-NEXT:    vptestnmd %zmm1, %zmm1, %k1
+; AVX512DQ-NEXT:    vptestnmd %zmm2, %zmm2, %k2
+; AVX512DQ-NEXT:    vptestnmd %zmm3, %zmm3, %k3
+; AVX512DQ-NEXT:    korb %k1, %k0, %k0
+; AVX512DQ-NEXT:    korb %k3, %k2, %k1
+; AVX512DQ-NEXT:    kandb %k1, %k0, %k0
+; AVX512DQ-NEXT:    kortestb %k0, %k0
+; AVX512DQ-NEXT:    je LBB71_1
+; AVX512DQ-NEXT:  ## %bb.2: ## %exit
+; AVX512DQ-NEXT:    popq %rax
+; AVX512DQ-NEXT:    vzeroupper
+; AVX512DQ-NEXT:    retq
+; AVX512DQ-NEXT:  LBB71_1: ## %bar
+; AVX512DQ-NEXT:    vzeroupper
+; AVX512DQ-NEXT:    callq _foo
+; AVX512DQ-NEXT:    popq %rax
+; AVX512DQ-NEXT:    retq
+;
+; X86-LABEL: ktest_3:
+; X86:       ## %bb.0:
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 16
+; X86-NEXT:    vptestnmd %ymm0, %ymm0, %k0
+; X86-NEXT:    vptestnmd %ymm1, %ymm1, %k1
+; X86-NEXT:    korb %k1, %k0, %k0
+; X86-NEXT:    vptestnmd %ymm2, %ymm2, %k1
+; X86-NEXT:    vptestnmd %ymm3, %ymm3, %k2
+; X86-NEXT:    korb %k2, %k1, %k1
+; X86-NEXT:    kandb %k1, %k0, %k0
+; X86-NEXT:    kortestb %k0, %k0
+; X86-NEXT:    je LBB71_1
+; X86-NEXT:  ## %bb.2: ## %exit
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    retl
+; X86-NEXT:  LBB71_1: ## %bar
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    calll _foo
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    retl
+  %a = icmp eq <8 x i32> %w, zeroinitializer
+  %b = icmp eq <8 x i32> %x, zeroinitializer
+  %c = icmp eq <8 x i32> %y, zeroinitializer
+  %d = icmp eq <8 x i32> %z, zeroinitializer
+  %e = or <8 x i1> %a, %b
+  %f = or <8 x i1> %c, %d
+  %g = and <8 x i1> %e, %f
+  %h = bitcast <8 x i1> %g to i8
+  %i = icmp eq i8 %h, 0
+  br i1 %i, label %bar, label %exit
+
+bar:
+  call void @foo()
+  br label %exit
+
+exit:
+  ret void
+}
+
+define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
+; KNL-LABEL: ktest_4:
+; KNL:       ## %bb.0:
+; KNL-NEXT:    pushq %rax
+; KNL-NEXT:    .cfi_def_cfa_offset 16
+; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
+; KNL-NEXT:    vptestnmq %zmm1, %zmm1, %k1
+; KNL-NEXT:    vptestnmq %zmm2, %zmm2, %k2
+; KNL-NEXT:    vptestnmq %zmm3, %zmm3, %k3
+; KNL-NEXT:    korw %k1, %k0, %k0
+; KNL-NEXT:    korw %k3, %k2, %k1
+; KNL-NEXT:    kandw %k1, %k0, %k0
+; KNL-NEXT:    kmovw %k0, %eax
+; KNL-NEXT:    testb %al, %al
+; KNL-NEXT:    je LBB72_1
+; KNL-NEXT:  ## %bb.2: ## %exit
+; KNL-NEXT:    popq %rax
+; KNL-NEXT:    vzeroupper
+; KNL-NEXT:    retq
+; KNL-NEXT:  LBB72_1: ## %bar
+; KNL-NEXT:    vzeroupper
+; KNL-NEXT:    callq _foo
+; KNL-NEXT:    popq %rax
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: ktest_4:
+; SKX:       ## %bb.0:
+; SKX-NEXT:    pushq %rax
+; SKX-NEXT:    .cfi_def_cfa_offset 16
+; SKX-NEXT:    vptestnmq %zmm0, %zmm0, %k0
+; SKX-NEXT:    vptestnmq %zmm1, %zmm1, %k1
+; SKX-NEXT:    korb %k1, %k0, %k0
+; SKX-NEXT:    vptestnmq %zmm2, %zmm2, %k1
+; SKX-NEXT:    vptestnmq %zmm3, %zmm3, %k2
+; SKX-NEXT:    korb %k2, %k1, %k1
+; SKX-NEXT:    kandb %k1, %k0, %k0
+; SKX-NEXT:    kortestb %k0, %k0
+; SKX-NEXT:    je LBB72_1
+; SKX-NEXT:  ## %bb.2: ## %exit
+; SKX-NEXT:    popq %rax
+; SKX-NEXT:    vzeroupper
+; SKX-NEXT:    retq
+; SKX-NEXT:  LBB72_1: ## %bar
+; SKX-NEXT:    vzeroupper
+; SKX-NEXT:    callq _foo
+; SKX-NEXT:    popq %rax
+; SKX-NEXT:    retq
+;
+; AVX512BW-LABEL: ktest_4:
+; AVX512BW:       ## %bb.0:
+; AVX512BW-NEXT:    pushq %rax
+; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
+; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
+; AVX512BW-NEXT:    vptestnmq %zmm1, %zmm1, %k1
+; AVX512BW-NEXT:    vptestnmq %zmm2, %zmm2, %k2
+; AVX512BW-NEXT:    vptestnmq %zmm3, %zmm3, %k3
+; AVX512BW-NEXT:    korw %k1, %k0, %k0
+; AVX512BW-NEXT:    korw %k3, %k2, %k1
+; AVX512BW-NEXT:    kandw %k1, %k0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb %al, %al
+; AVX512BW-NEXT:    je LBB72_1
+; AVX512BW-NEXT:  ## %bb.2: ## %exit
+; AVX512BW-NEXT:    popq %rax
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+; AVX512BW-NEXT:  LBB72_1: ## %bar
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    callq _foo
+; AVX512BW-NEXT:    popq %rax
+; AVX512BW-NEXT:    retq
+;
+; AVX512DQ-LABEL: ktest_4:
+; AVX512DQ:       ## %bb.0:
+; AVX512DQ-NEXT:    pushq %rax
+; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
+; AVX512DQ-NEXT:    vptestnmq %zmm0, %zmm0, %k0
+; AVX512DQ-NEXT:    vptestnmq %zmm1, %zmm1, %k1
+; AVX512DQ-NEXT:    korb %k1, %k0, %k0
+; AVX512DQ-NEXT:    vptestnmq %zmm2, %zmm2, %k1
+; AVX512DQ-NEXT:    vptestnmq %zmm3, %zmm3, %k2
+; AVX512DQ-NEXT:    korb %k2, %k1, %k1
+; AVX512DQ-NEXT:    kandb %k1, %k0, %k0
+; AVX512DQ-NEXT:    kortestb %k0, %k0
+; AVX512DQ-NEXT:    je LBB72_1
+; AVX512DQ-NEXT:  ## %bb.2: ## %exit
+; AVX512DQ-NEXT:    popq %rax
+; AVX512DQ-NEXT:    vzeroupper
+; AVX512DQ-NEXT:    retq
+; AVX512DQ-NEXT:  LBB72_1: ## %bar
+; AVX512DQ-NEXT:    vzeroupper
+; AVX512DQ-NEXT:    callq _foo
+; AVX512DQ-NEXT:    popq %rax
+; AVX512DQ-NEXT:    retq
+;
+; X86-LABEL: ktest_4:
+; X86:       ## %bb.0:
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 16
+; X86-NEXT:    vptestnmq %zmm0, %zmm0, %k0
+; X86-NEXT:    vptestnmq %zmm1, %zmm1, %k1
+; X86-NEXT:    korb %k1, %k0, %k0
+; X86-NEXT:    vptestnmq %zmm2, %zmm2, %k1
+; X86-NEXT:    vptestnmq %zmm3, %zmm3, %k2
+; X86-NEXT:    korb %k2, %k1, %k1
+; X86-NEXT:    kandb %k1, %k0, %k0
+; X86-NEXT:    kortestb %k0, %k0
+; X86-NEXT:    je LBB72_1
+; X86-NEXT:  ## %bb.2: ## %exit
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    retl
+; X86-NEXT:  LBB72_1: ## %bar
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    calll _foo
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    retl
+  %a = icmp eq <8 x i64> %w, zeroinitializer
+  %b = icmp eq <8 x i64> %x, zeroinitializer
+  %c = icmp eq <8 x i64> %y, zeroinitializer
+  %d = icmp eq <8 x i64> %z, zeroinitializer
+  %e = or <8 x i1> %a, %b
+  %f = or <8 x i1> %c, %d
+  %g = and <8 x i1> %e, %f
+  %h = bitcast <8 x i1> %g to i8
+  %i = icmp eq i8 %h, 0
+  br i1 %i, label %bar, label %exit
+
+bar:
+  call void @foo()
+  br label %exit
+
+exit:
+  ret void
+}
+
+define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z) {
+; CHECK-LABEL: ktest_5:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k0
+; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    vptestnmd %zmm2, %zmm2, %k1
+; CHECK-NEXT:    vptestnmd %zmm3, %zmm3, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kandw %k1, %k0, %k0
+; CHECK-NEXT:    kortestw %k0, %k0
+; CHECK-NEXT:    je LBB73_1
+; CHECK-NEXT:  ## %bb.2: ## %exit
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  LBB73_1: ## %bar
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    callq _foo
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    retq
+;
+; X86-LABEL: ktest_5:
+; X86:       ## %bb.0:
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 16
+; X86-NEXT:    vptestnmd %zmm0, %zmm0, %k0
+; X86-NEXT:    vptestnmd %zmm1, %zmm1, %k1
+; X86-NEXT:    korw %k1, %k0, %k0
+; X86-NEXT:    vptestnmd %zmm2, %zmm2, %k1
+; X86-NEXT:    vptestnmd %zmm3, %zmm3, %k2
+; X86-NEXT:    korw %k2, %k1, %k1
+; X86-NEXT:    kandw %k1, %k0, %k0
+; X86-NEXT:    kortestw %k0, %k0
+; X86-NEXT:    je LBB73_1
+; X86-NEXT:  ## %bb.2: ## %exit
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    retl
+; X86-NEXT:  LBB73_1: ## %bar
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    calll _foo
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    retl
+  %a = icmp eq <16 x i32> %w, zeroinitializer
+  %b = icmp eq <16 x i32> %x, zeroinitializer
+  %c = icmp eq <16 x i32> %y, zeroinitializer
+  %d = icmp eq <16 x i32> %z, zeroinitializer
+  %e = or <16 x i1> %a, %b
+  %f = or <16 x i1> %c, %d
+  %g = and <16 x i1> %e, %f
+  %h = bitcast <16 x i1> %g to i16
+  %i = icmp eq i16 %h, 0
+  br i1 %i, label %bar, label %exit
+
+bar:
+  call void @foo()
+  br label %exit
+
+exit:
+  ret void
+}
+
+define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z) {
+; KNL-LABEL: ktest_6:
+; KNL:       ## %bb.0:
+; KNL-NEXT:    pushq %rax
+; KNL-NEXT:    .cfi_def_cfa_offset 16
+; KNL-NEXT:    vpxor %xmm8, %xmm8, %xmm8
+; KNL-NEXT:    vpcmpeqw %ymm8, %ymm0, %ymm0
+; KNL-NEXT:    vpcmpeqw %ymm8, %ymm1, %ymm1
+; KNL-NEXT:    vpcmpeqw %ymm8, %ymm2, %ymm2
+; KNL-NEXT:    vpor %ymm2, %ymm0, %ymm0
+; KNL-NEXT:    vpcmpeqw %ymm8, %ymm3, %ymm2
+; KNL-NEXT:    vpor %ymm2, %ymm1, %ymm1
+; KNL-NEXT:    vpcmpeqw %ymm8, %ymm4, %ymm2
+; KNL-NEXT:    vpcmpeqw %ymm8, %ymm5, %ymm3
+; KNL-NEXT:    vpcmpeqw %ymm8, %ymm6, %ymm4
+; KNL-NEXT:    vpor %ymm4, %ymm2, %ymm2
+; KNL-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; KNL-NEXT:    vpcmpeqw %ymm8, %ymm7, %ymm2
+; KNL-NEXT:    vpor %ymm2, %ymm3, %ymm2
+; KNL-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; KNL-NEXT:    kmovw %k0, %eax
+; KNL-NEXT:    vpmovsxwd %ymm1, %zmm0
+; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; KNL-NEXT:    kmovw %k0, %ecx
+; KNL-NEXT:    shll $16, %ecx
+; KNL-NEXT:    orl %eax, %ecx
+; KNL-NEXT:    je LBB74_1
+; KNL-NEXT:  ## %bb.2: ## %exit
+; KNL-NEXT:    popq %rax
+; KNL-NEXT:    vzeroupper
+; KNL-NEXT:    retq
+; KNL-NEXT:  LBB74_1: ## %bar
+; KNL-NEXT:    vzeroupper
+; KNL-NEXT:    callq _foo
+; KNL-NEXT:    popq %rax
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: ktest_6:
+; SKX:       ## %bb.0:
+; SKX-NEXT:    pushq %rax
+; SKX-NEXT:    .cfi_def_cfa_offset 16
+; SKX-NEXT:    vptestnmw %zmm0, %zmm0, %k0
+; SKX-NEXT:    vptestnmw %zmm1, %zmm1, %k1
+; SKX-NEXT:    kord %k1, %k0, %k0
+; SKX-NEXT:    vptestnmw %zmm2, %zmm2, %k1
+; SKX-NEXT:    vptestnmw %zmm3, %zmm3, %k2
+; SKX-NEXT:    kord %k2, %k1, %k1
+; SKX-NEXT:    kandd %k1, %k0, %k0
+; SKX-NEXT:    kortestd %k0, %k0
+; SKX-NEXT:    je LBB74_1
+; SKX-NEXT:  ## %bb.2: ## %exit
+; SKX-NEXT:    popq %rax
+; SKX-NEXT:    vzeroupper
+; SKX-NEXT:    retq
+; SKX-NEXT:  LBB74_1: ## %bar
+; SKX-NEXT:    vzeroupper
+; SKX-NEXT:    callq _foo
+; SKX-NEXT:    popq %rax
+; SKX-NEXT:    retq
+;
+; AVX512BW-LABEL: ktest_6:
+; AVX512BW:       ## %bb.0:
+; AVX512BW-NEXT:    pushq %rax
+; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
+; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
+; AVX512BW-NEXT:    vptestnmw %zmm1, %zmm1, %k1
+; AVX512BW-NEXT:    kord %k1, %k0, %k0
+; AVX512BW-NEXT:    vptestnmw %zmm2, %zmm2, %k1
+; AVX512BW-NEXT:    vptestnmw %zmm3, %zmm3, %k2
+; AVX512BW-NEXT:    kord %k2, %k1, %k1
+; AVX512BW-NEXT:    kandd %k1, %k0, %k0
+; AVX512BW-NEXT:    kortestd %k0, %k0
+; AVX512BW-NEXT:    je LBB74_1
+; AVX512BW-NEXT:  ## %bb.2: ## %exit
+; AVX512BW-NEXT:    popq %rax
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+; AVX512BW-NEXT:  LBB74_1: ## %bar
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    callq _foo
+; AVX512BW-NEXT:    popq %rax
+; AVX512BW-NEXT:    retq
+;
+; AVX512DQ-LABEL: ktest_6:
+; AVX512DQ:       ## %bb.0:
+; AVX512DQ-NEXT:    pushq %rax
+; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
+; AVX512DQ-NEXT:    vpxor %xmm8, %xmm8, %xmm8
+; AVX512DQ-NEXT:    vpcmpeqw %ymm8, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpcmpeqw %ymm8, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpcmpeqw %ymm8, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpor %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpcmpeqw %ymm8, %ymm3, %ymm2
+; AVX512DQ-NEXT:    vpor %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpcmpeqw %ymm8, %ymm4, %ymm2
+; AVX512DQ-NEXT:    vpcmpeqw %ymm8, %ymm5, %ymm3
+; AVX512DQ-NEXT:    vpcmpeqw %ymm8, %ymm6, %ymm4
+; AVX512DQ-NEXT:    vpor %ymm4, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpcmpeqw %ymm8, %ymm7, %ymm2
+; AVX512DQ-NEXT:    vpor %ymm2, %ymm3, %ymm2
+; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpmovsxwd %ymm0, %zmm0
+; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
+; AVX512DQ-NEXT:    kmovw %k0, %eax
+; AVX512DQ-NEXT:    vpmovsxwd %ymm1, %zmm0
+; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
+; AVX512DQ-NEXT:    kmovw %k0, %ecx
+; AVX512DQ-NEXT:    shll $16, %ecx
+; AVX512DQ-NEXT:    orl %eax, %ecx
+; AVX512DQ-NEXT:    je LBB74_1
+; AVX512DQ-NEXT:  ## %bb.2: ## %exit
+; AVX512DQ-NEXT:    popq %rax
+; AVX512DQ-NEXT:    vzeroupper
+; AVX512DQ-NEXT:    retq
+; AVX512DQ-NEXT:  LBB74_1: ## %bar
+; AVX512DQ-NEXT:    vzeroupper
+; AVX512DQ-NEXT:    callq _foo
+; AVX512DQ-NEXT:    popq %rax
+; AVX512DQ-NEXT:    retq
+;
+; X86-LABEL: ktest_6:
+; X86:       ## %bb.0:
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 16
+; X86-NEXT:    vptestnmw %zmm0, %zmm0, %k0
+; X86-NEXT:    vptestnmw %zmm1, %zmm1, %k1
+; X86-NEXT:    kord %k1, %k0, %k0
+; X86-NEXT:    vptestnmw %zmm2, %zmm2, %k1
+; X86-NEXT:    vptestnmw %zmm3, %zmm3, %k2
+; X86-NEXT:    kord %k2, %k1, %k1
+; X86-NEXT:    kandd %k1, %k0, %k0
+; X86-NEXT:    kortestd %k0, %k0
+; X86-NEXT:    je LBB74_1
+; X86-NEXT:  ## %bb.2: ## %exit
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    retl
+; X86-NEXT:  LBB74_1: ## %bar
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    calll _foo
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    retl
+  %a = icmp eq <32 x i16> %w, zeroinitializer
+  %b = icmp eq <32 x i16> %x, zeroinitializer
+  %c = icmp eq <32 x i16> %y, zeroinitializer
+  %d = icmp eq <32 x i16> %z, zeroinitializer
+  %e = or <32 x i1> %a, %b
+  %f = or <32 x i1> %c, %d
+  %g = and <32 x i1> %e, %f
+  %h = bitcast <32 x i1> %g to i32
+  %i = icmp eq i32 %h, 0
+  br i1 %i, label %bar, label %exit
+
+bar:
+  call void @foo()
+  br label %exit
+
+exit:
+  ret void
+}
+
+define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
+; KNL-LABEL: ktest_7:
+; KNL:       ## %bb.0:
+; KNL-NEXT:    pushq %rax
+; KNL-NEXT:    .cfi_def_cfa_offset 16
+; KNL-NEXT:    vpxor %xmm8, %xmm8, %xmm8
+; KNL-NEXT:    vpcmpeqb %ymm8, %ymm0, %ymm9
+; KNL-NEXT:    vextracti128 $1, %ymm9, %xmm0
+; KNL-NEXT:    vpcmpeqb %ymm8, %ymm1, %ymm10
+; KNL-NEXT:    vextracti128 $1, %ymm10, %xmm1
+; KNL-NEXT:    vpcmpeqb %ymm8, %ymm2, %ymm11
+; KNL-NEXT:    vextracti128 $1, %ymm11, %xmm2
+; KNL-NEXT:    vpor %xmm2, %xmm0, %xmm13
+; KNL-NEXT:    vpcmpeqb %ymm8, %ymm3, %ymm2
+; KNL-NEXT:    vextracti128 $1, %ymm2, %xmm3
+; KNL-NEXT:    vpor %xmm3, %xmm1, %xmm12
+; KNL-NEXT:    vpcmpeqb %ymm8, %ymm4, %ymm3
+; KNL-NEXT:    vextracti128 $1, %ymm3, %xmm4
+; KNL-NEXT:    vpcmpeqb %ymm8, %ymm5, %ymm5
+; KNL-NEXT:    vextracti128 $1, %ymm5, %xmm1
+; KNL-NEXT:    vpcmpeqb %ymm8, %ymm6, %ymm6
+; KNL-NEXT:    vextracti128 $1, %ymm6, %xmm0
+; KNL-NEXT:    vpor %xmm0, %xmm4, %xmm0
+; KNL-NEXT:    vpand %xmm0, %xmm13, %xmm0
+; KNL-NEXT:    vpcmpeqb %ymm8, %ymm7, %ymm4
+; KNL-NEXT:    vextracti128 $1, %ymm4, %xmm7
+; KNL-NEXT:    vpor %xmm7, %xmm1, %xmm1
+; KNL-NEXT:    vpand %xmm1, %xmm12, %xmm1
+; KNL-NEXT:    vpor %xmm2, %xmm10, %xmm2
+; KNL-NEXT:    vpor %xmm11, %xmm9, %xmm7
+; KNL-NEXT:    vpor %xmm4, %xmm5, %xmm4
+; KNL-NEXT:    vpand %xmm4, %xmm2, %xmm2
+; KNL-NEXT:    vpor %xmm6, %xmm3, %xmm3
+; KNL-NEXT:    vpand %xmm3, %xmm7, %xmm3
+; KNL-NEXT:    vpmovsxbd %xmm3, %zmm3
+; KNL-NEXT:    vptestmd %zmm3, %zmm3, %k0
+; KNL-NEXT:    kmovw %k0, %eax
+; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; KNL-NEXT:    kmovw %k0, %ecx
+; KNL-NEXT:    shll $16, %ecx
+; KNL-NEXT:    orl %eax, %ecx
+; KNL-NEXT:    vpmovsxbd %xmm2, %zmm0
+; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; KNL-NEXT:    kmovw %k0, %eax
+; KNL-NEXT:    vpmovsxbd %xmm1, %zmm0
+; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; KNL-NEXT:    kmovw %k0, %edx
+; KNL-NEXT:    shll $16, %edx
+; KNL-NEXT:    orl %eax, %edx
+; KNL-NEXT:    shlq $32, %rdx
+; KNL-NEXT:    orq %rcx, %rdx
+; KNL-NEXT:    je LBB75_1
+; KNL-NEXT:  ## %bb.2: ## %exit
+; KNL-NEXT:    popq %rax
+; KNL-NEXT:    vzeroupper
+; KNL-NEXT:    retq
+; KNL-NEXT:  LBB75_1: ## %bar
+; KNL-NEXT:    vzeroupper
+; KNL-NEXT:    callq _foo
+; KNL-NEXT:    popq %rax
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: ktest_7:
+; SKX:       ## %bb.0:
+; SKX-NEXT:    pushq %rax
+; SKX-NEXT:    .cfi_def_cfa_offset 16
+; SKX-NEXT:    vptestnmb %zmm0, %zmm0, %k0
+; SKX-NEXT:    vptestnmb %zmm1, %zmm1, %k1
+; SKX-NEXT:    korq %k1, %k0, %k0
+; SKX-NEXT:    vptestnmb %zmm2, %zmm2, %k1
+; SKX-NEXT:    vptestnmb %zmm3, %zmm3, %k2
+; SKX-NEXT:    korq %k2, %k1, %k1
+; SKX-NEXT:    kandq %k1, %k0, %k0
+; SKX-NEXT:    kortestq %k0, %k0
+; SKX-NEXT:    je LBB75_1
+; SKX-NEXT:  ## %bb.2: ## %exit
+; SKX-NEXT:    popq %rax
+; SKX-NEXT:    vzeroupper
+; SKX-NEXT:    retq
+; SKX-NEXT:  LBB75_1: ## %bar
+; SKX-NEXT:    vzeroupper
+; SKX-NEXT:    callq _foo
+; SKX-NEXT:    popq %rax
+; SKX-NEXT:    retq
+;
+; AVX512BW-LABEL: ktest_7:
+; AVX512BW:       ## %bb.0:
+; AVX512BW-NEXT:    pushq %rax
+; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
+; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
+; AVX512BW-NEXT:    vptestnmb %zmm1, %zmm1, %k1
+; AVX512BW-NEXT:    korq %k1, %k0, %k0
+; AVX512BW-NEXT:    vptestnmb %zmm2, %zmm2, %k1
+; AVX512BW-NEXT:    vptestnmb %zmm3, %zmm3, %k2
+; AVX512BW-NEXT:    korq %k2, %k1, %k1
+; AVX512BW-NEXT:    kandq %k1, %k0, %k0
+; AVX512BW-NEXT:    kortestq %k0, %k0
+; AVX512BW-NEXT:    je LBB75_1
+; AVX512BW-NEXT:  ## %bb.2: ## %exit
+; AVX512BW-NEXT:    popq %rax
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+; AVX512BW-NEXT:  LBB75_1: ## %bar
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    callq _foo
+; AVX512BW-NEXT:    popq %rax
+; AVX512BW-NEXT:    retq
+;
+; AVX512DQ-LABEL: ktest_7:
+; AVX512DQ:       ## %bb.0:
+; AVX512DQ-NEXT:    pushq %rax
+; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
+; AVX512DQ-NEXT:    vpxor %xmm8, %xmm8, %xmm8
+; AVX512DQ-NEXT:    vpcmpeqb %ymm8, %ymm0, %ymm9
+; AVX512DQ-NEXT:    vextracti128 $1, %ymm9, %xmm0
+; AVX512DQ-NEXT:    vpcmpeqb %ymm8, %ymm1, %ymm10
+; AVX512DQ-NEXT:    vextracti128 $1, %ymm10, %xmm1
+; AVX512DQ-NEXT:    vpcmpeqb %ymm8, %ymm2, %ymm11
+; AVX512DQ-NEXT:    vextracti128 $1, %ymm11, %xmm2
+; AVX512DQ-NEXT:    vpor %xmm2, %xmm0, %xmm13
+; AVX512DQ-NEXT:    vpcmpeqb %ymm8, %ymm3, %ymm2
+; AVX512DQ-NEXT:    vextracti128 $1, %ymm2, %xmm3
+; AVX512DQ-NEXT:    vpor %xmm3, %xmm1, %xmm12
+; AVX512DQ-NEXT:    vpcmpeqb %ymm8, %ymm4, %ymm3
+; AVX512DQ-NEXT:    vextracti128 $1, %ymm3, %xmm4
+; AVX512DQ-NEXT:    vpcmpeqb %ymm8, %ymm5, %ymm5
+; AVX512DQ-NEXT:    vextracti128 $1, %ymm5, %xmm1
+; AVX512DQ-NEXT:    vpcmpeqb %ymm8, %ymm6, %ymm6
+; AVX512DQ-NEXT:    vextracti128 $1, %ymm6, %xmm0
+; AVX512DQ-NEXT:    vpor %xmm0, %xmm4, %xmm0
+; AVX512DQ-NEXT:    vpand %xmm0, %xmm13, %xmm0
+; AVX512DQ-NEXT:    vpcmpeqb %ymm8, %ymm7, %ymm4
+; AVX512DQ-NEXT:    vextracti128 $1, %ymm4, %xmm7
+; AVX512DQ-NEXT:    vpor %xmm7, %xmm1, %xmm1
+; AVX512DQ-NEXT:    vpand %xmm1, %xmm12, %xmm1
+; AVX512DQ-NEXT:    vpor %xmm2, %xmm10, %xmm2
+; AVX512DQ-NEXT:    vpor %xmm11, %xmm9, %xmm7
+; AVX512DQ-NEXT:    vpor %xmm4, %xmm5, %xmm4
+; AVX512DQ-NEXT:    vpand %xmm4, %xmm2, %xmm2
+; AVX512DQ-NEXT:    vpor %xmm6, %xmm3, %xmm3
+; AVX512DQ-NEXT:    vpand %xmm3, %xmm7, %xmm3
+; AVX512DQ-NEXT:    vpmovsxbd %xmm3, %zmm3
+; AVX512DQ-NEXT:    vpmovd2m %zmm3, %k0
+; AVX512DQ-NEXT:    kmovw %k0, %eax
+; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
+; AVX512DQ-NEXT:    kmovw %k0, %ecx
+; AVX512DQ-NEXT:    shll $16, %ecx
+; AVX512DQ-NEXT:    orl %eax, %ecx
+; AVX512DQ-NEXT:    vpmovsxbd %xmm2, %zmm0
+; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
+; AVX512DQ-NEXT:    kmovw %k0, %eax
+; AVX512DQ-NEXT:    vpmovsxbd %xmm1, %zmm0
+; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
+; AVX512DQ-NEXT:    kmovw %k0, %edx
+; AVX512DQ-NEXT:    shll $16, %edx
+; AVX512DQ-NEXT:    orl %eax, %edx
+; AVX512DQ-NEXT:    shlq $32, %rdx
+; AVX512DQ-NEXT:    orq %rcx, %rdx
+; AVX512DQ-NEXT:    je LBB75_1
+; AVX512DQ-NEXT:  ## %bb.2: ## %exit
+; AVX512DQ-NEXT:    popq %rax
+; AVX512DQ-NEXT:    vzeroupper
+; AVX512DQ-NEXT:    retq
+; AVX512DQ-NEXT:  LBB75_1: ## %bar
+; AVX512DQ-NEXT:    vzeroupper
+; AVX512DQ-NEXT:    callq _foo
+; AVX512DQ-NEXT:    popq %rax
+; AVX512DQ-NEXT:    retq
+;
+; X86-LABEL: ktest_7:
+; X86:       ## %bb.0:
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 16
+; X86-NEXT:    vptestnmb %zmm0, %zmm0, %k0
+; X86-NEXT:    vptestnmb %zmm1, %zmm1, %k1
+; X86-NEXT:    korq %k1, %k0, %k0
+; X86-NEXT:    vptestnmb %zmm2, %zmm2, %k1
+; X86-NEXT:    vptestnmb %zmm3, %zmm3, %k2
+; X86-NEXT:    korq %k2, %k1, %k1
+; X86-NEXT:    kandq %k1, %k0, %k0
+; X86-NEXT:    kshiftrq $32, %k0, %k1
+; X86-NEXT:    kortestd %k1, %k0
+; X86-NEXT:    je LBB75_1
+; X86-NEXT:  ## %bb.2: ## %exit
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    retl
+; X86-NEXT:  LBB75_1: ## %bar
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    calll _foo
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    retl
+  %a = icmp eq <64 x i8> %w, zeroinitializer
+  %b = icmp eq <64 x i8> %x, zeroinitializer
+  %c = icmp eq <64 x i8> %y, zeroinitializer
+  %d = icmp eq <64 x i8> %z, zeroinitializer
+  %e = or <64 x i1> %a, %b
+  %f = or <64 x i1> %c, %d
+  %g = and <64 x i1> %e, %f
+  %h = bitcast <64 x i1> %g to i64
+  %i = icmp eq i64 %h, 0
+  br i1 %i, label %bar, label %exit
+
+bar:
+  call void @foo()
+  br label %exit
+
+exit:
+  ret void
+}




More information about the llvm-commits mailing list