[llvm] r350287 - [X86] Add test cases for opportunities to use KTEST when check if the result of ANDing two mask registers is zero.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 2 23:12:54 PST 2019
Author: ctopper
Date: Wed Jan 2 23:12:54 2019
New Revision: 350287
URL: http://llvm.org/viewvc/llvm-project?rev=350287&view=rev
Log:
[X86] Add test cases for opportunities to use KTEST when check if the result of ANDing two mask registers is zero.
The test cases are constructed to avoid folding the AND into a masked compare operation.
Currently we emit a KAND and a KORTEST for these cases.
Modified:
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=350287&r1=350286&r2=350287&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Wed Jan 2 23:12:54 2019
@@ -3479,3 +3479,729 @@ define void @mask_not_cast(i8*, <8 x i64
ret void
}
declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
+
+define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
+; KNL-LABEL: ktest_3:
+; KNL: ## %bb.0:
+; KNL-NEXT: pushq %rax
+; KNL-NEXT: .cfi_def_cfa_offset 16
+; KNL-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
+; KNL-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
+; KNL-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
+; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
+; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
+; KNL-NEXT: vptestnmd %zmm1, %zmm1, %k1
+; KNL-NEXT: vptestnmd %zmm2, %zmm2, %k2
+; KNL-NEXT: vptestnmd %zmm3, %zmm3, %k3
+; KNL-NEXT: korw %k1, %k0, %k0
+; KNL-NEXT: korw %k3, %k2, %k1
+; KNL-NEXT: kandw %k1, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: testb %al, %al
+; KNL-NEXT: je LBB71_1
+; KNL-NEXT: ## %bb.2: ## %exit
+; KNL-NEXT: popq %rax
+; KNL-NEXT: vzeroupper
+; KNL-NEXT: retq
+; KNL-NEXT: LBB71_1: ## %bar
+; KNL-NEXT: vzeroupper
+; KNL-NEXT: callq _foo
+; KNL-NEXT: popq %rax
+; KNL-NEXT: retq
+;
+; SKX-LABEL: ktest_3:
+; SKX: ## %bb.0:
+; SKX-NEXT: pushq %rax
+; SKX-NEXT: .cfi_def_cfa_offset 16
+; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k0
+; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1
+; SKX-NEXT: korb %k1, %k0, %k0
+; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1
+; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k2
+; SKX-NEXT: korb %k2, %k1, %k1
+; SKX-NEXT: kandb %k1, %k0, %k0
+; SKX-NEXT: kortestb %k0, %k0
+; SKX-NEXT: je LBB71_1
+; SKX-NEXT: ## %bb.2: ## %exit
+; SKX-NEXT: popq %rax
+; SKX-NEXT: vzeroupper
+; SKX-NEXT: retq
+; SKX-NEXT: LBB71_1: ## %bar
+; SKX-NEXT: vzeroupper
+; SKX-NEXT: callq _foo
+; SKX-NEXT: popq %rax
+; SKX-NEXT: retq
+;
+; AVX512BW-LABEL: ktest_3:
+; AVX512BW: ## %bb.0:
+; AVX512BW-NEXT: pushq %rax
+; AVX512BW-NEXT: .cfi_def_cfa_offset 16
+; AVX512BW-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
+; AVX512BW-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
+; AVX512BW-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
+; AVX512BW-NEXT: vptestnmd %zmm1, %zmm1, %k1
+; AVX512BW-NEXT: vptestnmd %zmm2, %zmm2, %k2
+; AVX512BW-NEXT: vptestnmd %zmm3, %zmm3, %k3
+; AVX512BW-NEXT: korw %k1, %k0, %k0
+; AVX512BW-NEXT: korw %k3, %k2, %k1
+; AVX512BW-NEXT: kandw %k1, %k0, %k0
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: testb %al, %al
+; AVX512BW-NEXT: je LBB71_1
+; AVX512BW-NEXT: ## %bb.2: ## %exit
+; AVX512BW-NEXT: popq %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+; AVX512BW-NEXT: LBB71_1: ## %bar
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: callq _foo
+; AVX512BW-NEXT: popq %rax
+; AVX512BW-NEXT: retq
+;
+; AVX512DQ-LABEL: ktest_3:
+; AVX512DQ: ## %bb.0:
+; AVX512DQ-NEXT: pushq %rax
+; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
+; AVX512DQ-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
+; AVX512DQ-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
+; AVX512DQ-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512DQ-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
+; AVX512DQ-NEXT: vptestnmd %zmm1, %zmm1, %k1
+; AVX512DQ-NEXT: vptestnmd %zmm2, %zmm2, %k2
+; AVX512DQ-NEXT: vptestnmd %zmm3, %zmm3, %k3
+; AVX512DQ-NEXT: korb %k1, %k0, %k0
+; AVX512DQ-NEXT: korb %k3, %k2, %k1
+; AVX512DQ-NEXT: kandb %k1, %k0, %k0
+; AVX512DQ-NEXT: kortestb %k0, %k0
+; AVX512DQ-NEXT: je LBB71_1
+; AVX512DQ-NEXT: ## %bb.2: ## %exit
+; AVX512DQ-NEXT: popq %rax
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
+; AVX512DQ-NEXT: LBB71_1: ## %bar
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: callq _foo
+; AVX512DQ-NEXT: popq %rax
+; AVX512DQ-NEXT: retq
+;
+; X86-LABEL: ktest_3:
+; X86: ## %bb.0:
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: .cfi_def_cfa_offset 16
+; X86-NEXT: vptestnmd %ymm0, %ymm0, %k0
+; X86-NEXT: vptestnmd %ymm1, %ymm1, %k1
+; X86-NEXT: korb %k1, %k0, %k0
+; X86-NEXT: vptestnmd %ymm2, %ymm2, %k1
+; X86-NEXT: vptestnmd %ymm3, %ymm3, %k2
+; X86-NEXT: korb %k2, %k1, %k1
+; X86-NEXT: kandb %k1, %k0, %k0
+; X86-NEXT: kortestb %k0, %k0
+; X86-NEXT: je LBB71_1
+; X86-NEXT: ## %bb.2: ## %exit
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: vzeroupper
+; X86-NEXT: retl
+; X86-NEXT: LBB71_1: ## %bar
+; X86-NEXT: vzeroupper
+; X86-NEXT: calll _foo
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: retl
+ %a = icmp eq <8 x i32> %w, zeroinitializer
+ %b = icmp eq <8 x i32> %x, zeroinitializer
+ %c = icmp eq <8 x i32> %y, zeroinitializer
+ %d = icmp eq <8 x i32> %z, zeroinitializer
+ %e = or <8 x i1> %a, %b
+ %f = or <8 x i1> %c, %d
+ %g = and <8 x i1> %e, %f
+ %h = bitcast <8 x i1> %g to i8
+ %i = icmp eq i8 %h, 0
+ br i1 %i, label %bar, label %exit
+
+bar:
+ call void @foo()
+ br label %exit
+
+exit:
+ ret void
+}
+
+define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
+; KNL-LABEL: ktest_4:
+; KNL: ## %bb.0:
+; KNL-NEXT: pushq %rax
+; KNL-NEXT: .cfi_def_cfa_offset 16
+; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
+; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1
+; KNL-NEXT: vptestnmq %zmm2, %zmm2, %k2
+; KNL-NEXT: vptestnmq %zmm3, %zmm3, %k3
+; KNL-NEXT: korw %k1, %k0, %k0
+; KNL-NEXT: korw %k3, %k2, %k1
+; KNL-NEXT: kandw %k1, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: testb %al, %al
+; KNL-NEXT: je LBB72_1
+; KNL-NEXT: ## %bb.2: ## %exit
+; KNL-NEXT: popq %rax
+; KNL-NEXT: vzeroupper
+; KNL-NEXT: retq
+; KNL-NEXT: LBB72_1: ## %bar
+; KNL-NEXT: vzeroupper
+; KNL-NEXT: callq _foo
+; KNL-NEXT: popq %rax
+; KNL-NEXT: retq
+;
+; SKX-LABEL: ktest_4:
+; SKX: ## %bb.0:
+; SKX-NEXT: pushq %rax
+; SKX-NEXT: .cfi_def_cfa_offset 16
+; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k0
+; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1
+; SKX-NEXT: korb %k1, %k0, %k0
+; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1
+; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k2
+; SKX-NEXT: korb %k2, %k1, %k1
+; SKX-NEXT: kandb %k1, %k0, %k0
+; SKX-NEXT: kortestb %k0, %k0
+; SKX-NEXT: je LBB72_1
+; SKX-NEXT: ## %bb.2: ## %exit
+; SKX-NEXT: popq %rax
+; SKX-NEXT: vzeroupper
+; SKX-NEXT: retq
+; SKX-NEXT: LBB72_1: ## %bar
+; SKX-NEXT: vzeroupper
+; SKX-NEXT: callq _foo
+; SKX-NEXT: popq %rax
+; SKX-NEXT: retq
+;
+; AVX512BW-LABEL: ktest_4:
+; AVX512BW: ## %bb.0:
+; AVX512BW-NEXT: pushq %rax
+; AVX512BW-NEXT: .cfi_def_cfa_offset 16
+; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
+; AVX512BW-NEXT: vptestnmq %zmm1, %zmm1, %k1
+; AVX512BW-NEXT: vptestnmq %zmm2, %zmm2, %k2
+; AVX512BW-NEXT: vptestnmq %zmm3, %zmm3, %k3
+; AVX512BW-NEXT: korw %k1, %k0, %k0
+; AVX512BW-NEXT: korw %k3, %k2, %k1
+; AVX512BW-NEXT: kandw %k1, %k0, %k0
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: testb %al, %al
+; AVX512BW-NEXT: je LBB72_1
+; AVX512BW-NEXT: ## %bb.2: ## %exit
+; AVX512BW-NEXT: popq %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+; AVX512BW-NEXT: LBB72_1: ## %bar
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: callq _foo
+; AVX512BW-NEXT: popq %rax
+; AVX512BW-NEXT: retq
+;
+; AVX512DQ-LABEL: ktest_4:
+; AVX512DQ: ## %bb.0:
+; AVX512DQ-NEXT: pushq %rax
+; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
+; AVX512DQ-NEXT: vptestnmq %zmm0, %zmm0, %k0
+; AVX512DQ-NEXT: vptestnmq %zmm1, %zmm1, %k1
+; AVX512DQ-NEXT: korb %k1, %k0, %k0
+; AVX512DQ-NEXT: vptestnmq %zmm2, %zmm2, %k1
+; AVX512DQ-NEXT: vptestnmq %zmm3, %zmm3, %k2
+; AVX512DQ-NEXT: korb %k2, %k1, %k1
+; AVX512DQ-NEXT: kandb %k1, %k0, %k0
+; AVX512DQ-NEXT: kortestb %k0, %k0
+; AVX512DQ-NEXT: je LBB72_1
+; AVX512DQ-NEXT: ## %bb.2: ## %exit
+; AVX512DQ-NEXT: popq %rax
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
+; AVX512DQ-NEXT: LBB72_1: ## %bar
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: callq _foo
+; AVX512DQ-NEXT: popq %rax
+; AVX512DQ-NEXT: retq
+;
+; X86-LABEL: ktest_4:
+; X86: ## %bb.0:
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: .cfi_def_cfa_offset 16
+; X86-NEXT: vptestnmq %zmm0, %zmm0, %k0
+; X86-NEXT: vptestnmq %zmm1, %zmm1, %k1
+; X86-NEXT: korb %k1, %k0, %k0
+; X86-NEXT: vptestnmq %zmm2, %zmm2, %k1
+; X86-NEXT: vptestnmq %zmm3, %zmm3, %k2
+; X86-NEXT: korb %k2, %k1, %k1
+; X86-NEXT: kandb %k1, %k0, %k0
+; X86-NEXT: kortestb %k0, %k0
+; X86-NEXT: je LBB72_1
+; X86-NEXT: ## %bb.2: ## %exit
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: vzeroupper
+; X86-NEXT: retl
+; X86-NEXT: LBB72_1: ## %bar
+; X86-NEXT: vzeroupper
+; X86-NEXT: calll _foo
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: retl
+ %a = icmp eq <8 x i64> %w, zeroinitializer
+ %b = icmp eq <8 x i64> %x, zeroinitializer
+ %c = icmp eq <8 x i64> %y, zeroinitializer
+ %d = icmp eq <8 x i64> %z, zeroinitializer
+ %e = or <8 x i1> %a, %b
+ %f = or <8 x i1> %c, %d
+ %g = and <8 x i1> %e, %f
+ %h = bitcast <8 x i1> %g to i8
+ %i = icmp eq i8 %h, 0
+ br i1 %i, label %bar, label %exit
+
+bar:
+ call void @foo()
+ br label %exit
+
+exit:
+ ret void
+}
+
+define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z) {
+; CHECK-LABEL: ktest_5:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0
+; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
+; CHECK-NEXT: vptestnmd %zmm3, %zmm3, %k2
+; CHECK-NEXT: korw %k2, %k1, %k1
+; CHECK-NEXT: kandw %k1, %k0, %k0
+; CHECK-NEXT: kortestw %k0, %k0
+; CHECK-NEXT: je LBB73_1
+; CHECK-NEXT: ## %bb.2: ## %exit
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+; CHECK-NEXT: LBB73_1: ## %bar
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq _foo
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+;
+; X86-LABEL: ktest_5:
+; X86: ## %bb.0:
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: .cfi_def_cfa_offset 16
+; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
+; X86-NEXT: vptestnmd %zmm1, %zmm1, %k1
+; X86-NEXT: korw %k1, %k0, %k0
+; X86-NEXT: vptestnmd %zmm2, %zmm2, %k1
+; X86-NEXT: vptestnmd %zmm3, %zmm3, %k2
+; X86-NEXT: korw %k2, %k1, %k1
+; X86-NEXT: kandw %k1, %k0, %k0
+; X86-NEXT: kortestw %k0, %k0
+; X86-NEXT: je LBB73_1
+; X86-NEXT: ## %bb.2: ## %exit
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: vzeroupper
+; X86-NEXT: retl
+; X86-NEXT: LBB73_1: ## %bar
+; X86-NEXT: vzeroupper
+; X86-NEXT: calll _foo
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: retl
+ %a = icmp eq <16 x i32> %w, zeroinitializer
+ %b = icmp eq <16 x i32> %x, zeroinitializer
+ %c = icmp eq <16 x i32> %y, zeroinitializer
+ %d = icmp eq <16 x i32> %z, zeroinitializer
+ %e = or <16 x i1> %a, %b
+ %f = or <16 x i1> %c, %d
+ %g = and <16 x i1> %e, %f
+ %h = bitcast <16 x i1> %g to i16
+ %i = icmp eq i16 %h, 0
+ br i1 %i, label %bar, label %exit
+
+bar:
+ call void @foo()
+ br label %exit
+
+exit:
+ ret void
+}
+
+define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z) {
+; KNL-LABEL: ktest_6:
+; KNL: ## %bb.0:
+; KNL-NEXT: pushq %rax
+; KNL-NEXT: .cfi_def_cfa_offset 16
+; KNL-NEXT: vpxor %xmm8, %xmm8, %xmm8
+; KNL-NEXT: vpcmpeqw %ymm8, %ymm0, %ymm0
+; KNL-NEXT: vpcmpeqw %ymm8, %ymm1, %ymm1
+; KNL-NEXT: vpcmpeqw %ymm8, %ymm2, %ymm2
+; KNL-NEXT: vpor %ymm2, %ymm0, %ymm0
+; KNL-NEXT: vpcmpeqw %ymm8, %ymm3, %ymm2
+; KNL-NEXT: vpor %ymm2, %ymm1, %ymm1
+; KNL-NEXT: vpcmpeqw %ymm8, %ymm4, %ymm2
+; KNL-NEXT: vpcmpeqw %ymm8, %ymm5, %ymm3
+; KNL-NEXT: vpcmpeqw %ymm8, %ymm6, %ymm4
+; KNL-NEXT: vpor %ymm4, %ymm2, %ymm2
+; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0
+; KNL-NEXT: vpcmpeqw %ymm8, %ymm7, %ymm2
+; KNL-NEXT: vpor %ymm2, %ymm3, %ymm2
+; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
+; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: vpmovsxwd %ymm1, %zmm0
+; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
+; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: shll $16, %ecx
+; KNL-NEXT: orl %eax, %ecx
+; KNL-NEXT: je LBB74_1
+; KNL-NEXT: ## %bb.2: ## %exit
+; KNL-NEXT: popq %rax
+; KNL-NEXT: vzeroupper
+; KNL-NEXT: retq
+; KNL-NEXT: LBB74_1: ## %bar
+; KNL-NEXT: vzeroupper
+; KNL-NEXT: callq _foo
+; KNL-NEXT: popq %rax
+; KNL-NEXT: retq
+;
+; SKX-LABEL: ktest_6:
+; SKX: ## %bb.0:
+; SKX-NEXT: pushq %rax
+; SKX-NEXT: .cfi_def_cfa_offset 16
+; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k0
+; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1
+; SKX-NEXT: kord %k1, %k0, %k0
+; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1
+; SKX-NEXT: vptestnmw %zmm3, %zmm3, %k2
+; SKX-NEXT: kord %k2, %k1, %k1
+; SKX-NEXT: kandd %k1, %k0, %k0
+; SKX-NEXT: kortestd %k0, %k0
+; SKX-NEXT: je LBB74_1
+; SKX-NEXT: ## %bb.2: ## %exit
+; SKX-NEXT: popq %rax
+; SKX-NEXT: vzeroupper
+; SKX-NEXT: retq
+; SKX-NEXT: LBB74_1: ## %bar
+; SKX-NEXT: vzeroupper
+; SKX-NEXT: callq _foo
+; SKX-NEXT: popq %rax
+; SKX-NEXT: retq
+;
+; AVX512BW-LABEL: ktest_6:
+; AVX512BW: ## %bb.0:
+; AVX512BW-NEXT: pushq %rax
+; AVX512BW-NEXT: .cfi_def_cfa_offset 16
+; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0
+; AVX512BW-NEXT: vptestnmw %zmm1, %zmm1, %k1
+; AVX512BW-NEXT: kord %k1, %k0, %k0
+; AVX512BW-NEXT: vptestnmw %zmm2, %zmm2, %k1
+; AVX512BW-NEXT: vptestnmw %zmm3, %zmm3, %k2
+; AVX512BW-NEXT: kord %k2, %k1, %k1
+; AVX512BW-NEXT: kandd %k1, %k0, %k0
+; AVX512BW-NEXT: kortestd %k0, %k0
+; AVX512BW-NEXT: je LBB74_1
+; AVX512BW-NEXT: ## %bb.2: ## %exit
+; AVX512BW-NEXT: popq %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+; AVX512BW-NEXT: LBB74_1: ## %bar
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: callq _foo
+; AVX512BW-NEXT: popq %rax
+; AVX512BW-NEXT: retq
+;
+; AVX512DQ-LABEL: ktest_6:
+; AVX512DQ: ## %bb.0:
+; AVX512DQ-NEXT: pushq %rax
+; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
+; AVX512DQ-NEXT: vpxor %xmm8, %xmm8, %xmm8
+; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpor %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm3, %ymm2
+; AVX512DQ-NEXT: vpor %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm4, %ymm2
+; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm5, %ymm3
+; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm6, %ymm4
+; AVX512DQ-NEXT: vpor %ymm4, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm7, %ymm2
+; AVX512DQ-NEXT: vpor %ymm2, %ymm3, %ymm2
+; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
+; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
+; AVX512DQ-NEXT: kmovw %k0, %eax
+; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm0
+; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
+; AVX512DQ-NEXT: kmovw %k0, %ecx
+; AVX512DQ-NEXT: shll $16, %ecx
+; AVX512DQ-NEXT: orl %eax, %ecx
+; AVX512DQ-NEXT: je LBB74_1
+; AVX512DQ-NEXT: ## %bb.2: ## %exit
+; AVX512DQ-NEXT: popq %rax
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
+; AVX512DQ-NEXT: LBB74_1: ## %bar
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: callq _foo
+; AVX512DQ-NEXT: popq %rax
+; AVX512DQ-NEXT: retq
+;
+; X86-LABEL: ktest_6:
+; X86: ## %bb.0:
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: .cfi_def_cfa_offset 16
+; X86-NEXT: vptestnmw %zmm0, %zmm0, %k0
+; X86-NEXT: vptestnmw %zmm1, %zmm1, %k1
+; X86-NEXT: kord %k1, %k0, %k0
+; X86-NEXT: vptestnmw %zmm2, %zmm2, %k1
+; X86-NEXT: vptestnmw %zmm3, %zmm3, %k2
+; X86-NEXT: kord %k2, %k1, %k1
+; X86-NEXT: kandd %k1, %k0, %k0
+; X86-NEXT: kortestd %k0, %k0
+; X86-NEXT: je LBB74_1
+; X86-NEXT: ## %bb.2: ## %exit
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: vzeroupper
+; X86-NEXT: retl
+; X86-NEXT: LBB74_1: ## %bar
+; X86-NEXT: vzeroupper
+; X86-NEXT: calll _foo
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: retl
+ %a = icmp eq <32 x i16> %w, zeroinitializer
+ %b = icmp eq <32 x i16> %x, zeroinitializer
+ %c = icmp eq <32 x i16> %y, zeroinitializer
+ %d = icmp eq <32 x i16> %z, zeroinitializer
+ %e = or <32 x i1> %a, %b
+ %f = or <32 x i1> %c, %d
+ %g = and <32 x i1> %e, %f
+ %h = bitcast <32 x i1> %g to i32
+ %i = icmp eq i32 %h, 0
+ br i1 %i, label %bar, label %exit
+
+bar:
+ call void @foo()
+ br label %exit
+
+exit:
+ ret void
+}
+
+define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
+; KNL-LABEL: ktest_7:
+; KNL: ## %bb.0:
+; KNL-NEXT: pushq %rax
+; KNL-NEXT: .cfi_def_cfa_offset 16
+; KNL-NEXT: vpxor %xmm8, %xmm8, %xmm8
+; KNL-NEXT: vpcmpeqb %ymm8, %ymm0, %ymm9
+; KNL-NEXT: vextracti128 $1, %ymm9, %xmm0
+; KNL-NEXT: vpcmpeqb %ymm8, %ymm1, %ymm10
+; KNL-NEXT: vextracti128 $1, %ymm10, %xmm1
+; KNL-NEXT: vpcmpeqb %ymm8, %ymm2, %ymm11
+; KNL-NEXT: vextracti128 $1, %ymm11, %xmm2
+; KNL-NEXT: vpor %xmm2, %xmm0, %xmm13
+; KNL-NEXT: vpcmpeqb %ymm8, %ymm3, %ymm2
+; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3
+; KNL-NEXT: vpor %xmm3, %xmm1, %xmm12
+; KNL-NEXT: vpcmpeqb %ymm8, %ymm4, %ymm3
+; KNL-NEXT: vextracti128 $1, %ymm3, %xmm4
+; KNL-NEXT: vpcmpeqb %ymm8, %ymm5, %ymm5
+; KNL-NEXT: vextracti128 $1, %ymm5, %xmm1
+; KNL-NEXT: vpcmpeqb %ymm8, %ymm6, %ymm6
+; KNL-NEXT: vextracti128 $1, %ymm6, %xmm0
+; KNL-NEXT: vpor %xmm0, %xmm4, %xmm0
+; KNL-NEXT: vpand %xmm0, %xmm13, %xmm0
+; KNL-NEXT: vpcmpeqb %ymm8, %ymm7, %ymm4
+; KNL-NEXT: vextracti128 $1, %ymm4, %xmm7
+; KNL-NEXT: vpor %xmm7, %xmm1, %xmm1
+; KNL-NEXT: vpand %xmm1, %xmm12, %xmm1
+; KNL-NEXT: vpor %xmm2, %xmm10, %xmm2
+; KNL-NEXT: vpor %xmm11, %xmm9, %xmm7
+; KNL-NEXT: vpor %xmm4, %xmm5, %xmm4
+; KNL-NEXT: vpand %xmm4, %xmm2, %xmm2
+; KNL-NEXT: vpor %xmm6, %xmm3, %xmm3
+; KNL-NEXT: vpand %xmm3, %xmm7, %xmm3
+; KNL-NEXT: vpmovsxbd %xmm3, %zmm3
+; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
+; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: shll $16, %ecx
+; KNL-NEXT: orl %eax, %ecx
+; KNL-NEXT: vpmovsxbd %xmm2, %zmm0
+; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
+; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
+; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: shll $16, %edx
+; KNL-NEXT: orl %eax, %edx
+; KNL-NEXT: shlq $32, %rdx
+; KNL-NEXT: orq %rcx, %rdx
+; KNL-NEXT: je LBB75_1
+; KNL-NEXT: ## %bb.2: ## %exit
+; KNL-NEXT: popq %rax
+; KNL-NEXT: vzeroupper
+; KNL-NEXT: retq
+; KNL-NEXT: LBB75_1: ## %bar
+; KNL-NEXT: vzeroupper
+; KNL-NEXT: callq _foo
+; KNL-NEXT: popq %rax
+; KNL-NEXT: retq
+;
+; SKX-LABEL: ktest_7:
+; SKX: ## %bb.0:
+; SKX-NEXT: pushq %rax
+; SKX-NEXT: .cfi_def_cfa_offset 16
+; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k0
+; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1
+; SKX-NEXT: korq %k1, %k0, %k0
+; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1
+; SKX-NEXT: vptestnmb %zmm3, %zmm3, %k2
+; SKX-NEXT: korq %k2, %k1, %k1
+; SKX-NEXT: kandq %k1, %k0, %k0
+; SKX-NEXT: kortestq %k0, %k0
+; SKX-NEXT: je LBB75_1
+; SKX-NEXT: ## %bb.2: ## %exit
+; SKX-NEXT: popq %rax
+; SKX-NEXT: vzeroupper
+; SKX-NEXT: retq
+; SKX-NEXT: LBB75_1: ## %bar
+; SKX-NEXT: vzeroupper
+; SKX-NEXT: callq _foo
+; SKX-NEXT: popq %rax
+; SKX-NEXT: retq
+;
+; AVX512BW-LABEL: ktest_7:
+; AVX512BW: ## %bb.0:
+; AVX512BW-NEXT: pushq %rax
+; AVX512BW-NEXT: .cfi_def_cfa_offset 16
+; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
+; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k1
+; AVX512BW-NEXT: korq %k1, %k0, %k0
+; AVX512BW-NEXT: vptestnmb %zmm2, %zmm2, %k1
+; AVX512BW-NEXT: vptestnmb %zmm3, %zmm3, %k2
+; AVX512BW-NEXT: korq %k2, %k1, %k1
+; AVX512BW-NEXT: kandq %k1, %k0, %k0
+; AVX512BW-NEXT: kortestq %k0, %k0
+; AVX512BW-NEXT: je LBB75_1
+; AVX512BW-NEXT: ## %bb.2: ## %exit
+; AVX512BW-NEXT: popq %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+; AVX512BW-NEXT: LBB75_1: ## %bar
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: callq _foo
+; AVX512BW-NEXT: popq %rax
+; AVX512BW-NEXT: retq
+;
+; AVX512DQ-LABEL: ktest_7:
+; AVX512DQ: ## %bb.0:
+; AVX512DQ-NEXT: pushq %rax
+; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
+; AVX512DQ-NEXT: vpxor %xmm8, %xmm8, %xmm8
+; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm0, %ymm9
+; AVX512DQ-NEXT: vextracti128 $1, %ymm9, %xmm0
+; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm1, %ymm10
+; AVX512DQ-NEXT: vextracti128 $1, %ymm10, %xmm1
+; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm2, %ymm11
+; AVX512DQ-NEXT: vextracti128 $1, %ymm11, %xmm2
+; AVX512DQ-NEXT: vpor %xmm2, %xmm0, %xmm13
+; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm3, %ymm2
+; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3
+; AVX512DQ-NEXT: vpor %xmm3, %xmm1, %xmm12
+; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm4, %ymm3
+; AVX512DQ-NEXT: vextracti128 $1, %ymm3, %xmm4
+; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm5, %ymm5
+; AVX512DQ-NEXT: vextracti128 $1, %ymm5, %xmm1
+; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm6, %ymm6
+; AVX512DQ-NEXT: vextracti128 $1, %ymm6, %xmm0
+; AVX512DQ-NEXT: vpor %xmm0, %xmm4, %xmm0
+; AVX512DQ-NEXT: vpand %xmm0, %xmm13, %xmm0
+; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm7, %ymm4
+; AVX512DQ-NEXT: vextracti128 $1, %ymm4, %xmm7
+; AVX512DQ-NEXT: vpor %xmm7, %xmm1, %xmm1
+; AVX512DQ-NEXT: vpand %xmm1, %xmm12, %xmm1
+; AVX512DQ-NEXT: vpor %xmm2, %xmm10, %xmm2
+; AVX512DQ-NEXT: vpor %xmm11, %xmm9, %xmm7
+; AVX512DQ-NEXT: vpor %xmm4, %xmm5, %xmm4
+; AVX512DQ-NEXT: vpand %xmm4, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpor %xmm6, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpand %xmm3, %xmm7, %xmm3
+; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3
+; AVX512DQ-NEXT: vpmovd2m %zmm3, %k0
+; AVX512DQ-NEXT: kmovw %k0, %eax
+; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
+; AVX512DQ-NEXT: kmovw %k0, %ecx
+; AVX512DQ-NEXT: shll $16, %ecx
+; AVX512DQ-NEXT: orl %eax, %ecx
+; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm0
+; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
+; AVX512DQ-NEXT: kmovw %k0, %eax
+; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0
+; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
+; AVX512DQ-NEXT: kmovw %k0, %edx
+; AVX512DQ-NEXT: shll $16, %edx
+; AVX512DQ-NEXT: orl %eax, %edx
+; AVX512DQ-NEXT: shlq $32, %rdx
+; AVX512DQ-NEXT: orq %rcx, %rdx
+; AVX512DQ-NEXT: je LBB75_1
+; AVX512DQ-NEXT: ## %bb.2: ## %exit
+; AVX512DQ-NEXT: popq %rax
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
+; AVX512DQ-NEXT: LBB75_1: ## %bar
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: callq _foo
+; AVX512DQ-NEXT: popq %rax
+; AVX512DQ-NEXT: retq
+;
+; X86-LABEL: ktest_7:
+; X86: ## %bb.0:
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: .cfi_def_cfa_offset 16
+; X86-NEXT: vptestnmb %zmm0, %zmm0, %k0
+; X86-NEXT: vptestnmb %zmm1, %zmm1, %k1
+; X86-NEXT: korq %k1, %k0, %k0
+; X86-NEXT: vptestnmb %zmm2, %zmm2, %k1
+; X86-NEXT: vptestnmb %zmm3, %zmm3, %k2
+; X86-NEXT: korq %k2, %k1, %k1
+; X86-NEXT: kandq %k1, %k0, %k0
+; X86-NEXT: kshiftrq $32, %k0, %k1
+; X86-NEXT: kortestd %k1, %k0
+; X86-NEXT: je LBB75_1
+; X86-NEXT: ## %bb.2: ## %exit
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: vzeroupper
+; X86-NEXT: retl
+; X86-NEXT: LBB75_1: ## %bar
+; X86-NEXT: vzeroupper
+; X86-NEXT: calll _foo
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: retl
+ %a = icmp eq <64 x i8> %w, zeroinitializer
+ %b = icmp eq <64 x i8> %x, zeroinitializer
+ %c = icmp eq <64 x i8> %y, zeroinitializer
+ %d = icmp eq <64 x i8> %z, zeroinitializer
+ %e = or <64 x i1> %a, %b
+ %f = or <64 x i1> %c, %d
+ %g = and <64 x i1> %e, %f
+ %h = bitcast <64 x i1> %g to i64
+ %i = icmp eq i64 %h, 0
+ br i1 %i, label %bar, label %exit
+
+bar:
+ call void @foo()
+ br label %exit
+
+exit:
+ ret void
+}
More information about the llvm-commits
mailing list