[llvm] c972e1c - [X86] v8i1-masks.ll - add avx512 test coverage and use X86 check prefix instead of X32
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 21 06:14:40 PST 2023
Author: Simon Pilgrim
Date: 2023-01-21T14:14:31Z
New Revision: c972e1c8b59b144599b47e8a7946ff8e531a2049
URL: https://github.com/llvm/llvm-project/commit/c972e1c8b59b144599b47e8a7946ff8e531a2049
DIFF: https://github.com/llvm/llvm-project/commit/c972e1c8b59b144599b47e8a7946ff8e531a2049.diff
LOG: [X86] v8i1-masks.ll - add avx512 test coverage and use X86 check prefix instead of X32
We try to use X32 for tests on gnux32 triples
Added:
Modified:
llvm/test/CodeGen/X86/v8i1-masks.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/v8i1-masks.ll b/llvm/test/CodeGen/X86/v8i1-masks.ll
index c2dd3539e71e..212d9764622d 100644
--- a/llvm/test/CodeGen/X86/v8i1-masks.ll
+++ b/llvm/test/CodeGen/X86/v8i1-masks.ll
@@ -1,25 +1,27 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X86
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X64
-; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X32-AVX2
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X86-AVX2
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X64-AVX2
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512vl | FileCheck %s --check-prefix=X86-AVX512
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck %s --check-prefix=X64-AVX512
define void @and_masks(ptr %a, ptr %b, ptr %c) nounwind uwtable noinline ssp {
-; X32-LABEL: and_masks:
-; X32: ## %bb.0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT: vmovups (%edx), %ymm0
-; X32-NEXT: vmovups (%ecx), %ymm1
-; X32-NEXT: vcmpltps %ymm0, %ymm1, %ymm1
-; X32-NEXT: vmovups (%eax), %ymm2
-; X32-NEXT: vcmpltps %ymm0, %ymm2, %ymm0
-; X32-NEXT: vandps %ymm1, %ymm0, %ymm0
-; X32-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT: vmovaps %ymm0, (%eax)
-; X32-NEXT: vzeroupper
-; X32-NEXT: retl
+; X86-LABEL: and_masks:
+; X86: ## %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: vmovups (%edx), %ymm0
+; X86-NEXT: vmovups (%ecx), %ymm1
+; X86-NEXT: vcmpltps %ymm0, %ymm1, %ymm1
+; X86-NEXT: vmovups (%eax), %ymm2
+; X86-NEXT: vcmpltps %ymm0, %ymm2, %ymm0
+; X86-NEXT: vandps %ymm1, %ymm0, %ymm0
+; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: vmovaps %ymm0, (%eax)
+; X86-NEXT: vzeroupper
+; X86-NEXT: retl
;
; X64-LABEL: and_masks:
; X64: ## %bb.0:
@@ -34,21 +36,21 @@ define void @and_masks(ptr %a, ptr %b, ptr %c) nounwind uwtable noinline ssp {
; X64-NEXT: vzeroupper
; X64-NEXT: retq
;
-; X32-AVX2-LABEL: and_masks:
-; X32-AVX2: ## %bb.0:
-; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-AVX2-NEXT: vmovups (%edx), %ymm0
-; X32-AVX2-NEXT: vmovups (%ecx), %ymm1
-; X32-AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT: vmovups (%eax), %ymm2
-; X32-AVX2-NEXT: vcmpltps %ymm0, %ymm2, %ymm0
-; X32-AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
-; X32-AVX2-NEXT: vmovdqa %ymm0, (%eax)
-; X32-AVX2-NEXT: vzeroupper
-; X32-AVX2-NEXT: retl
+; X86-AVX2-LABEL: and_masks:
+; X86-AVX2: ## %bb.0:
+; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-AVX2-NEXT: vmovups (%edx), %ymm0
+; X86-AVX2-NEXT: vmovups (%ecx), %ymm1
+; X86-AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT: vmovups (%eax), %ymm2
+; X86-AVX2-NEXT: vcmpltps %ymm0, %ymm2, %ymm0
+; X86-AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
+; X86-AVX2-NEXT: vmovdqa %ymm0, (%eax)
+; X86-AVX2-NEXT: vzeroupper
+; X86-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: and_masks:
; X64-AVX2: ## %bb.0:
@@ -62,6 +64,33 @@ define void @and_masks(ptr %a, ptr %b, ptr %c) nounwind uwtable noinline ssp {
; X64-AVX2-NEXT: vmovdqa %ymm0, (%rax)
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
+;
+; X86-AVX512-LABEL: and_masks:
+; X86-AVX512: ## %bb.0:
+; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-AVX512-NEXT: vmovups (%edx), %ymm0
+; X86-AVX512-NEXT: vcmpgtps (%ecx), %ymm0, %k1
+; X86-AVX512-NEXT: vcmpgtps (%eax), %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT: vpsrld $31, %ymm0, %ymm0
+; X86-AVX512-NEXT: vmovdqa %ymm0, (%eax)
+; X86-AVX512-NEXT: vzeroupper
+; X86-AVX512-NEXT: retl
+;
+; X64-AVX512-LABEL: and_masks:
+; X64-AVX512: ## %bb.0:
+; X64-AVX512-NEXT: vmovups (%rdi), %ymm0
+; X64-AVX512-NEXT: vcmpgtps (%rdx), %ymm0, %k1
+; X64-AVX512-NEXT: vcmpgtps (%rsi), %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT: vpsrld $31, %ymm0, %ymm0
+; X64-AVX512-NEXT: vmovdqa %ymm0, (%rax)
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
%v0 = load <8 x float>, ptr %a, align 16
%v1 = load <8 x float>, ptr %b, align 16
%m0 = fcmp olt <8 x float> %v1, %v0
@@ -74,16 +103,16 @@ define void @and_masks(ptr %a, ptr %b, ptr %c) nounwind uwtable noinline ssp {
}
define void @neg_masks(ptr %a, ptr %b, ptr %c) nounwind uwtable noinline ssp {
-; X32-LABEL: neg_masks:
-; X32: ## %bb.0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: vmovups (%ecx), %ymm0
-; X32-NEXT: vcmpnltps (%eax), %ymm0, %ymm0
-; X32-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT: vmovaps %ymm0, (%eax)
-; X32-NEXT: vzeroupper
-; X32-NEXT: retl
+; X86-LABEL: neg_masks:
+; X86: ## %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovups (%ecx), %ymm0
+; X86-NEXT: vcmpnltps (%eax), %ymm0, %ymm0
+; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: vmovaps %ymm0, (%eax)
+; X86-NEXT: vzeroupper
+; X86-NEXT: retl
;
; X64-LABEL: neg_masks:
; X64: ## %bb.0:
@@ -94,17 +123,17 @@ define void @neg_masks(ptr %a, ptr %b, ptr %c) nounwind uwtable noinline ssp {
; X64-NEXT: vzeroupper
; X64-NEXT: retq
;
-; X32-AVX2-LABEL: neg_masks:
-; X32-AVX2: ## %bb.0:
-; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX2-NEXT: vmovups (%ecx), %ymm0
-; X32-AVX2-NEXT: vcmpnltps (%eax), %ymm0, %ymm0
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
-; X32-AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT: vmovaps %ymm0, (%eax)
-; X32-AVX2-NEXT: vzeroupper
-; X32-AVX2-NEXT: retl
+; X86-AVX2-LABEL: neg_masks:
+; X86-AVX2: ## %bb.0:
+; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX2-NEXT: vmovups (%ecx), %ymm0
+; X86-AVX2-NEXT: vcmpnltps (%eax), %ymm0, %ymm0
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
+; X86-AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vmovaps %ymm0, (%eax)
+; X86-AVX2-NEXT: vzeroupper
+; X86-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: neg_masks:
; X64-AVX2: ## %bb.0:
@@ -115,6 +144,30 @@ define void @neg_masks(ptr %a, ptr %b, ptr %c) nounwind uwtable noinline ssp {
; X64-AVX2-NEXT: vmovaps %ymm0, (%rax)
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
+;
+; X86-AVX512-LABEL: neg_masks:
+; X86-AVX512: ## %bb.0:
+; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX512-NEXT: vmovups (%ecx), %ymm0
+; X86-AVX512-NEXT: vcmpnltps (%eax), %ymm0, %k1
+; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT: vpsrld $31, %ymm0, %ymm0
+; X86-AVX512-NEXT: vmovdqa %ymm0, (%eax)
+; X86-AVX512-NEXT: vzeroupper
+; X86-AVX512-NEXT: retl
+;
+; X64-AVX512-LABEL: neg_masks:
+; X64-AVX512: ## %bb.0:
+; X64-AVX512-NEXT: vmovups (%rsi), %ymm0
+; X64-AVX512-NEXT: vcmpnltps (%rdi), %ymm0, %k1
+; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT: vpsrld $31, %ymm0, %ymm0
+; X64-AVX512-NEXT: vmovdqa %ymm0, (%rax)
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
%v0 = load <8 x float>, ptr %a, align 16
%v1 = load <8 x float>, ptr %b, align 16
%m0 = fcmp olt <8 x float> %v1, %v0
@@ -125,15 +178,15 @@ define void @neg_masks(ptr %a, ptr %b, ptr %c) nounwind uwtable noinline ssp {
}
define <8 x i32> @and_mask_constant(<8 x i32> %v0, <8 x i32> %v1) {
-; X32-LABEL: and_mask_constant:
-; X32: ## %bb.0:
-; X32-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X32-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; X32-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
-; X32-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
-; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X32-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT: retl
+; X86-LABEL: and_mask_constant:
+; X86: ## %bb.0:
+; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
+; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; X86-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
+; X86-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
+; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: retl
;
; X64-LABEL: and_mask_constant:
; X64: ## %bb.0:
@@ -145,12 +198,12 @@ define <8 x i32> @and_mask_constant(<8 x i32> %v0, <8 x i32> %v1) {
; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; X64-NEXT: retq
;
-; X32-AVX2-LABEL: and_mask_constant:
-; X32-AVX2: ## %bb.0:
-; X32-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X32-AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-AVX2-NEXT: retl
+; X86-AVX2-LABEL: and_mask_constant:
+; X86-AVX2: ## %bb.0:
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: and_mask_constant:
; X64-AVX2: ## %bb.0:
@@ -158,6 +211,26 @@ define <8 x i32> @and_mask_constant(<8 x i32> %v0, <8 x i32> %v1) {
; X64-AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; X64-AVX2-NEXT: retq
+;
+; X86-AVX512-LABEL: and_mask_constant:
+; X86-AVX512: ## %bb.0:
+; X86-AVX512-NEXT: movb $105, %al
+; X86-AVX512-NEXT: kmovw %eax, %k1
+; X86-AVX512-NEXT: vptestnmd %ymm0, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT: vpsrld $31, %ymm0, %ymm0
+; X86-AVX512-NEXT: retl
+;
+; X64-AVX512-LABEL: and_mask_constant:
+; X64-AVX512: ## %bb.0:
+; X64-AVX512-NEXT: movb $105, %al
+; X64-AVX512-NEXT: kmovw %eax, %k1
+; X64-AVX512-NEXT: vptestnmd %ymm0, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT: vpsrld $31, %ymm0, %ymm0
+; X64-AVX512-NEXT: retq
%m = icmp eq <8 x i32> %v0, zeroinitializer
%mand = and <8 x i1> %m, <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false>
%r = zext <8 x i1> %mand to <8 x i32>
@@ -165,13 +238,13 @@ define <8 x i32> @and_mask_constant(<8 x i32> %v0, <8 x i32> %v1) {
}
define <8 x i32> @two_ands(<8 x float> %x) local_unnamed_addr #0 {
-; X32-LABEL: two_ands:
-; X32: ## %bb.0: ## %entry
-; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT: vandps %ymm0, %ymm1, %ymm0
-; X32-NEXT: retl
+; X86-LABEL: two_ands:
+; X86: ## %bb.0: ## %entry
+; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: vandps %ymm0, %ymm1, %ymm0
+; X86-NEXT: retl
;
; X64-LABEL: two_ands:
; X64: ## %bb.0: ## %entry
@@ -181,14 +254,14 @@ define <8 x i32> @two_ands(<8 x float> %x) local_unnamed_addr #0 {
; X64-NEXT: vandps %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
;
-; X32-AVX2-LABEL: two_ands:
-; X32-AVX2: ## %bb.0: ## %entry
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0
-; X32-AVX2-NEXT: retl
+; X86-AVX2-LABEL: two_ands:
+; X86-AVX2: ## %bb.0: ## %entry
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: two_ands:
; X64-AVX2: ## %bb.0: ## %entry
@@ -198,6 +271,22 @@ define <8 x i32> @two_ands(<8 x float> %x) local_unnamed_addr #0 {
; X64-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm0
; X64-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: retq
+;
+; X86-AVX512-LABEL: two_ands:
+; X86-AVX512: ## %bb.0: ## %entry
+; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT: retl
+;
+; X64-AVX512-LABEL: two_ands:
+; X64-AVX512: ## %bb.0: ## %entry
+; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT: retq
entry:
%cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
%cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -207,16 +296,16 @@ entry:
}
define <8 x i32> @three_ands(<8 x float> %x) {
-; X32-LABEL: three_ands:
-; X32: ## %bb.0: ## %entry
-; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0
-; X32-NEXT: vandps %ymm0, %ymm2, %ymm0
-; X32-NEXT: vandps %ymm0, %ymm1, %ymm0
-; X32-NEXT: retl
+; X86-LABEL: three_ands:
+; X86: ## %bb.0: ## %entry
+; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0
+; X86-NEXT: vandps %ymm0, %ymm2, %ymm0
+; X86-NEXT: vandps %ymm0, %ymm1, %ymm0
+; X86-NEXT: retl
;
; X64-LABEL: three_ands:
; X64: ## %bb.0: ## %entry
@@ -229,17 +318,17 @@ define <8 x i32> @three_ands(<8 x float> %x) {
; X64-NEXT: vandps %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
;
-; X32-AVX2-LABEL: three_ands:
-; X32-AVX2: ## %bb.0: ## %entry
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1
-; X32-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0
-; X32-AVX2-NEXT: retl
+; X86-AVX2-LABEL: three_ands:
+; X86-AVX2: ## %bb.0: ## %entry
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1
+; X86-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: three_ands:
; X64-AVX2: ## %bb.0: ## %entry
@@ -252,6 +341,26 @@ define <8 x i32> @three_ands(<8 x float> %x) {
; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0
; X64-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: retq
+;
+; X86-AVX512-LABEL: three_ands:
+; X86-AVX512: ## %bb.0: ## %entry
+; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT: retl
+;
+; X64-AVX512-LABEL: three_ands:
+; X64-AVX512: ## %bb.0: ## %entry
+; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT: retq
entry:
%cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
%cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -263,18 +372,18 @@ entry:
}
define <8 x i32> @four_ands(<8 x float> %x) {
-; X32-LABEL: four_ands:
-; X32: ## %bb.0: ## %entry
-; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-NEXT: vandps %ymm3, %ymm2, %ymm2
-; X32-NEXT: vandps %ymm2, %ymm1, %ymm1
-; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT: vandps %ymm0, %ymm1, %ymm0
-; X32-NEXT: retl
+; X86-LABEL: four_ands:
+; X86: ## %bb.0: ## %entry
+; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-NEXT: vandps %ymm3, %ymm2, %ymm2
+; X86-NEXT: vandps %ymm2, %ymm1, %ymm1
+; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: vandps %ymm0, %ymm1, %ymm0
+; X86-NEXT: retl
;
; X64-LABEL: four_ands:
; X64: ## %bb.0: ## %entry
@@ -289,20 +398,20 @@ define <8 x i32> @four_ands(<8 x float> %x) {
; X64-NEXT: vandps %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
;
-; X32-AVX2-LABEL: four_ands:
-; X32-AVX2: ## %bb.0: ## %entry
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1
-; X32-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
-; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0
-; X32-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0
-; X32-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0
-; X32-AVX2-NEXT: retl
+; X86-AVX2-LABEL: four_ands:
+; X86-AVX2: ## %bb.0: ## %entry
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1
+; X86-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
+; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0
+; X86-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0
+; X86-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: four_ands:
; X64-AVX2: ## %bb.0: ## %entry
@@ -318,6 +427,28 @@ define <8 x i32> @four_ands(<8 x float> %x) {
; X64-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0
; X64-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: retq
+;
+; X86-AVX512-LABEL: four_ands:
+; X86-AVX512: ## %bb.0: ## %entry
+; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT: retl
+;
+; X64-AVX512-LABEL: four_ands:
+; X64-AVX512: ## %bb.0: ## %entry
+; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT: retq
entry:
%cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
%cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -331,20 +462,20 @@ entry:
}
define <8 x i32> @five_ands(<8 x float> %x) {
-; X32-LABEL: five_ands:
-; X32: ## %bb.0: ## %entry
-; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-NEXT: vandps %ymm3, %ymm2, %ymm2
-; X32-NEXT: vandps %ymm2, %ymm1, %ymm1
-; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT: vandps %ymm0, %ymm2, %ymm0
-; X32-NEXT: vandps %ymm0, %ymm1, %ymm0
-; X32-NEXT: retl
+; X86-LABEL: five_ands:
+; X86: ## %bb.0: ## %entry
+; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-NEXT: vandps %ymm3, %ymm2, %ymm2
+; X86-NEXT: vandps %ymm2, %ymm1, %ymm1
+; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: vandps %ymm0, %ymm2, %ymm0
+; X86-NEXT: vandps %ymm0, %ymm1, %ymm0
+; X86-NEXT: retl
;
; X64-LABEL: five_ands:
; X64: ## %bb.0: ## %entry
@@ -361,23 +492,23 @@ define <8 x i32> @five_ands(<8 x float> %x) {
; X64-NEXT: vandps %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
;
-; X32-AVX2-LABEL: five_ands:
-; X32-AVX2: ## %bb.0: ## %entry
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1
-; X32-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
-; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2
-; X32-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1]
-; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0
-; X32-AVX2-NEXT: retl
+; X86-AVX2-LABEL: five_ands:
+; X86-AVX2: ## %bb.0: ## %entry
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1
+; X86-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
+; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2
+; X86-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1]
+; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: five_ands:
; X64-AVX2: ## %bb.0: ## %entry
@@ -396,6 +527,30 @@ define <8 x i32> @five_ands(<8 x float> %x) {
; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0
; X64-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: retq
+;
+; X86-AVX512-LABEL: five_ands:
+; X86-AVX512: ## %bb.0: ## %entry
+; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT: retl
+;
+; X64-AVX512-LABEL: five_ands:
+; X64-AVX512: ## %bb.0: ## %entry
+; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT: retq
entry:
%cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
%cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -411,13 +566,13 @@ entry:
}
define <8 x i32> @two_or(<8 x float> %x) {
-; X32-LABEL: two_or:
-; X32: ## %bb.0: ## %entry
-; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT: vorps %ymm0, %ymm1, %ymm0
-; X32-NEXT: retl
+; X86-LABEL: two_or:
+; X86: ## %bb.0: ## %entry
+; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: vorps %ymm0, %ymm1, %ymm0
+; X86-NEXT: retl
;
; X64-LABEL: two_or:
; X64: ## %bb.0: ## %entry
@@ -427,14 +582,14 @@ define <8 x i32> @two_or(<8 x float> %x) {
; X64-NEXT: vorps %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
;
-; X32-AVX2-LABEL: two_or:
-; X32-AVX2: ## %bb.0: ## %entry
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0
-; X32-AVX2-NEXT: retl
+; X86-AVX2-LABEL: two_or:
+; X86-AVX2: ## %bb.0: ## %entry
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: two_or:
; X64-AVX2: ## %bb.0: ## %entry
@@ -444,6 +599,24 @@ define <8 x i32> @two_or(<8 x float> %x) {
; X64-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm0
; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: retq
+;
+; X86-AVX512-LABEL: two_or:
+; X86-AVX512: ## %bb.0: ## %entry
+; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0
+; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT: korw %k1, %k0, %k1
+; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT: retl
+;
+; X64-AVX512-LABEL: two_or:
+; X64-AVX512: ## %bb.0: ## %entry
+; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
+; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT: korw %k1, %k0, %k1
+; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT: retq
entry:
%cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
%cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -453,16 +626,16 @@ entry:
}
define <8 x i32> @three_or(<8 x float> %x) {
-; X32-LABEL: three_or:
-; X32: ## %bb.0: ## %entry
-; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0
-; X32-NEXT: vorps %ymm0, %ymm2, %ymm0
-; X32-NEXT: vorps %ymm0, %ymm1, %ymm0
-; X32-NEXT: retl
+; X86-LABEL: three_or:
+; X86: ## %bb.0: ## %entry
+; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0
+; X86-NEXT: vorps %ymm0, %ymm2, %ymm0
+; X86-NEXT: vorps %ymm0, %ymm1, %ymm0
+; X86-NEXT: retl
;
; X64-LABEL: three_or:
; X64: ## %bb.0: ## %entry
@@ -475,17 +648,17 @@ define <8 x i32> @three_or(<8 x float> %x) {
; X64-NEXT: vorps %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
;
-; X32-AVX2-LABEL: three_or:
-; X32-AVX2: ## %bb.0: ## %entry
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1
-; X32-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0
-; X32-AVX2-NEXT: retl
+; X86-AVX2-LABEL: three_or:
+; X86-AVX2: ## %bb.0: ## %entry
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1
+; X86-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: three_or:
; X64-AVX2: ## %bb.0: ## %entry
@@ -498,6 +671,30 @@ define <8 x i32> @three_or(<8 x float> %x) {
; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0
; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: retq
+;
+; X86-AVX512-LABEL: three_or:
+; X86-AVX512: ## %bb.0: ## %entry
+; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0
+; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT: korw %k1, %k0, %k0
+; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1
+; X86-AVX512-NEXT: korw %k1, %k0, %k1
+; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT: retl
+;
+; X64-AVX512-LABEL: three_or:
+; X64-AVX512: ## %bb.0: ## %entry
+; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
+; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT: korw %k1, %k0, %k0
+; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1
+; X64-AVX512-NEXT: korw %k1, %k0, %k1
+; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT: retq
entry:
%cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
%cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -509,18 +706,18 @@ entry:
}
; Function Attrs: norecurse nounwind readnone ssp uwtable
define <8 x i32> @four_or(<8 x float> %x) {
-; X32-LABEL: four_or:
-; X32: ## %bb.0: ## %entry
-; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-NEXT: vorps %ymm3, %ymm2, %ymm2
-; X32-NEXT: vorps %ymm2, %ymm1, %ymm1
-; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT: vorps %ymm0, %ymm1, %ymm0
-; X32-NEXT: retl
+; X86-LABEL: four_or:
+; X86: ## %bb.0: ## %entry
+; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-NEXT: vorps %ymm3, %ymm2, %ymm2
+; X86-NEXT: vorps %ymm2, %ymm1, %ymm1
+; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: vorps %ymm0, %ymm1, %ymm0
+; X86-NEXT: retl
;
; X64-LABEL: four_or:
; X64: ## %bb.0: ## %entry
@@ -535,20 +732,20 @@ define <8 x i32> @four_or(<8 x float> %x) {
; X64-NEXT: vorps %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
;
-; X32-AVX2-LABEL: four_or:
-; X32-AVX2: ## %bb.0: ## %entry
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1
-; X32-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
-; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0
-; X32-AVX2-NEXT: vorps %ymm0, %ymm2, %ymm0
-; X32-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0
-; X32-AVX2-NEXT: retl
+; X86-AVX2-LABEL: four_or:
+; X86-AVX2: ## %bb.0: ## %entry
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1
+; X86-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
+; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0
+; X86-AVX2-NEXT: vorps %ymm0, %ymm2, %ymm0
+; X86-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: four_or:
; X64-AVX2: ## %bb.0: ## %entry
@@ -564,6 +761,34 @@ define <8 x i32> @four_or(<8 x float> %x) {
; X64-AVX2-NEXT: vorps %ymm0, %ymm2, %ymm0
; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: retq
+;
+; X86-AVX512-LABEL: four_or:
+; X86-AVX512: ## %bb.0: ## %entry
+; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0
+; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT: korw %k1, %k0, %k0
+; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1
+; X86-AVX512-NEXT: korw %k1, %k0, %k0
+; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT: korw %k1, %k0, %k1
+; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT: retl
+;
+; X64-AVX512-LABEL: four_or:
+; X64-AVX512: ## %bb.0: ## %entry
+; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
+; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT: korw %k1, %k0, %k0
+; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1
+; X64-AVX512-NEXT: korw %k1, %k0, %k0
+; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT: korw %k1, %k0, %k1
+; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT: retq
entry:
%cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
%cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -577,20 +802,20 @@ entry:
}
; Function Attrs: norecurse nounwind readnone ssp uwtable
define <8 x i32> @five_or(<8 x float> %x) {
-; X32-LABEL: five_or:
-; X32: ## %bb.0: ## %entry
-; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-NEXT: vorps %ymm3, %ymm2, %ymm2
-; X32-NEXT: vorps %ymm2, %ymm1, %ymm1
-; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT: vorps %ymm0, %ymm2, %ymm0
-; X32-NEXT: vorps %ymm0, %ymm1, %ymm0
-; X32-NEXT: retl
+; X86-LABEL: five_or:
+; X86: ## %bb.0: ## %entry
+; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-NEXT: vorps %ymm3, %ymm2, %ymm2
+; X86-NEXT: vorps %ymm2, %ymm1, %ymm1
+; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: vorps %ymm0, %ymm2, %ymm0
+; X86-NEXT: vorps %ymm0, %ymm1, %ymm0
+; X86-NEXT: retl
;
; X64-LABEL: five_or:
; X64: ## %bb.0: ## %entry
@@ -607,23 +832,23 @@ define <8 x i32> @five_or(<8 x float> %x) {
; X64-NEXT: vorps %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
;
-; X32-AVX2-LABEL: five_or:
-; X32-AVX2: ## %bb.0: ## %entry
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1
-; X32-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
-; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-AVX2-NEXT: vorps %ymm3, %ymm2, %ymm2
-; X32-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1]
-; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0
-; X32-AVX2-NEXT: retl
+; X86-AVX2-LABEL: five_or:
+; X86-AVX2: ## %bb.0: ## %entry
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1
+; X86-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
+; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-AVX2-NEXT: vorps %ymm3, %ymm2, %ymm2
+; X86-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1]
+; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: five_or:
; X64-AVX2: ## %bb.0: ## %entry
@@ -642,6 +867,38 @@ define <8 x i32> @five_or(<8 x float> %x) {
; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0
; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: retq
+;
+; X86-AVX512-LABEL: five_or:
+; X86-AVX512: ## %bb.0: ## %entry
+; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0
+; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT: korw %k1, %k0, %k0
+; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1
+; X86-AVX512-NEXT: korw %k1, %k0, %k0
+; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT: korw %k1, %k0, %k0
+; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT: korw %k1, %k0, %k1
+; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT: retl
+;
+; X64-AVX512-LABEL: five_or:
+; X64-AVX512: ## %bb.0: ## %entry
+; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
+; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT: korw %k1, %k0, %k0
+; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1
+; X64-AVX512-NEXT: korw %k1, %k0, %k0
+; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT: korw %k1, %k0, %k0
+; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT: korw %k1, %k0, %k1
+; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT: retq
entry:
%cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
%cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -657,16 +914,16 @@ entry:
}
define <8 x i32> @three_or_and(<8 x float> %x) {
-; X32-LABEL: three_or_and:
-; X32: ## %bb.0: ## %entry
-; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0
-; X32-NEXT: vandps %ymm0, %ymm2, %ymm0
-; X32-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X32-NEXT: retl
+; X86-LABEL: three_or_and:
+; X86: ## %bb.0: ## %entry
+; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0
+; X86-NEXT: vandps %ymm0, %ymm2, %ymm0
+; X86-NEXT: vorps %ymm1, %ymm0, %ymm0
+; X86-NEXT: retl
;
; X64-LABEL: three_or_and:
; X64: ## %bb.0: ## %entry
@@ -679,17 +936,17 @@ define <8 x i32> @three_or_and(<8 x float> %x) {
; X64-NEXT: vorps %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
;
-; X32-AVX2-LABEL: three_or_and:
-; X32-AVX2: ## %bb.0: ## %entry
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0
-; X32-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0
-; X32-AVX2-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT: retl
+; X86-AVX2-LABEL: three_or_and:
+; X86-AVX2: ## %bb.0: ## %entry
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0
+; X86-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0
+; X86-AVX2-NEXT: vorps %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: three_or_and:
; X64-AVX2: ## %bb.0: ## %entry
@@ -702,6 +959,28 @@ define <8 x i32> @three_or_and(<8 x float> %x) {
; X64-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0
; X64-AVX2-NEXT: vorps %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: retq
+;
+; X86-AVX512-LABEL: three_or_and:
+; X86-AVX512: ## %bb.0: ## %entry
+; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0
+; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1
+; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT: korw %k0, %k1, %k1
+; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT: retl
+;
+; X64-AVX512-LABEL: three_or_and:
+; X64-AVX512: ## %bb.0: ## %entry
+; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
+; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1
+; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT: korw %k0, %k1, %k1
+; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT: retq
entry:
%cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
%cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -713,18 +992,18 @@ entry:
}
define <8 x i32> @four_or_and(<8 x float> %x) {
-; X32-LABEL: four_or_and:
-; X32: ## %bb.0: ## %entry
-; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT: vandps %ymm2, %ymm1, %ymm1
-; X32-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; X32-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2
-; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT: vandps %ymm0, %ymm2, %ymm0
-; X32-NEXT: vorps %ymm0, %ymm1, %ymm0
-; X32-NEXT: retl
+; X86-LABEL: four_or_and:
+; X86: ## %bb.0: ## %entry
+; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT: vandps %ymm2, %ymm1, %ymm1
+; X86-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; X86-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2
+; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: vandps %ymm0, %ymm2, %ymm0
+; X86-NEXT: vorps %ymm0, %ymm1, %ymm0
+; X86-NEXT: retl
;
; X64-LABEL: four_or_and:
; X64: ## %bb.0: ## %entry
@@ -739,20 +1018,20 @@ define <8 x i32> @four_or_and(<8 x float> %x) {
; X64-NEXT: vorps %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
;
-; X32-AVX2-LABEL: four_or_and:
-; X32-AVX2: ## %bb.0: ## %entry
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1
-; X32-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
-; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0
-; X32-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0
-; X32-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0
-; X32-AVX2-NEXT: retl
+; X86-AVX2-LABEL: four_or_and:
+; X86-AVX2: ## %bb.0: ## %entry
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1
+; X86-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
+; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0
+; X86-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0
+; X86-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: four_or_and:
; X64-AVX2: ## %bb.0: ## %entry
@@ -768,6 +1047,30 @@ define <8 x i32> @four_or_and(<8 x float> %x) {
; X64-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0
; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: retq
+;
+; X86-AVX512-LABEL: four_or_and:
+; X86-AVX512: ## %bb.0: ## %entry
+; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0 {%k1}
+; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1
+; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT: korw %k1, %k0, %k1
+; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT: retl
+;
+; X64-AVX512-LABEL: four_or_and:
+; X64-AVX512: ## %bb.0: ## %entry
+; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0 {%k1}
+; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1
+; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT: korw %k1, %k0, %k1
+; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT: retq
entry:
%cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
%cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -781,20 +1084,20 @@ entry:
}
define <8 x i32> @five_or_and(<8 x float> %x) {
-; X32-LABEL: five_or_and:
-; X32: ## %bb.0: ## %entry
-; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-NEXT: vandps %ymm3, %ymm2, %ymm2
-; X32-NEXT: vorps %ymm1, %ymm2, %ymm1
-; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT: vandps %ymm0, %ymm2, %ymm0
-; X32-NEXT: vorps %ymm0, %ymm1, %ymm0
-; X32-NEXT: retl
+; X86-LABEL: five_or_and:
+; X86: ## %bb.0: ## %entry
+; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-NEXT: vandps %ymm3, %ymm2, %ymm2
+; X86-NEXT: vorps %ymm1, %ymm2, %ymm1
+; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: vandps %ymm0, %ymm2, %ymm0
+; X86-NEXT: vorps %ymm0, %ymm1, %ymm0
+; X86-NEXT: retl
;
; X64-LABEL: five_or_and:
; X64: ## %bb.0: ## %entry
@@ -811,23 +1114,23 @@ define <8 x i32> @five_or_and(<8 x float> %x) {
; X64-NEXT: vorps %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
;
-; X32-AVX2-LABEL: five_or_and:
-; X32-AVX2: ## %bb.0: ## %entry
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2
-; X32-AVX2-NEXT: vorps %ymm1, %ymm2, %ymm1
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
-; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1]
-; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0
-; X32-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0
-; X32-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0
-; X32-AVX2-NEXT: retl
+; X86-AVX2-LABEL: five_or_and:
+; X86-AVX2: ## %bb.0: ## %entry
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2
+; X86-AVX2-NEXT: vorps %ymm1, %ymm2, %ymm1
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
+; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1]
+; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0
+; X86-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0
+; X86-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: five_or_and:
; X64-AVX2: ## %bb.0: ## %entry
@@ -846,6 +1149,34 @@ define <8 x i32> @five_or_and(<8 x float> %x) {
; X64-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0
; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: retq
+;
+; X86-AVX512-LABEL: five_or_and:
+; X86-AVX512: ## %bb.0: ## %entry
+; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0
+; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1
+; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT: korw %k0, %k1, %k0
+; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT: korw %k1, %k0, %k1
+; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT: retl
+;
+; X64-AVX512-LABEL: five_or_and:
+; X64-AVX512: ## %bb.0: ## %entry
+; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
+; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1
+; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT: korw %k0, %k1, %k0
+; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT: korw %k1, %k0, %k1
+; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT: retq
entry:
%cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
%cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -861,18 +1192,18 @@ entry:
}
define <8 x i32> @four_or_and_xor(<8 x float> %x) {
-; X32-LABEL: four_or_and_xor:
-; X32: ## %bb.0: ## %entry
-; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT: vxorps %ymm2, %ymm1, %ymm1
-; X32-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; X32-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2
-; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT: vandps %ymm0, %ymm2, %ymm0
-; X32-NEXT: vorps %ymm0, %ymm1, %ymm0
-; X32-NEXT: retl
+; X86-LABEL: four_or_and_xor:
+; X86: ## %bb.0: ## %entry
+; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT: vxorps %ymm2, %ymm1, %ymm1
+; X86-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; X86-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2
+; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: vandps %ymm0, %ymm2, %ymm0
+; X86-NEXT: vorps %ymm0, %ymm1, %ymm0
+; X86-NEXT: retl
;
; X64-LABEL: four_or_and_xor:
; X64: ## %bb.0: ## %entry
@@ -887,20 +1218,20 @@ define <8 x i32> @four_or_and_xor(<8 x float> %x) {
; X64-NEXT: vorps %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
;
-; X32-AVX2-LABEL: four_or_and_xor:
-; X32-AVX2: ## %bb.0: ## %entry
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT: vxorps %ymm2, %ymm1, %ymm1
-; X32-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
-; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0
-; X32-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0
-; X32-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0
-; X32-AVX2-NEXT: retl
+; X86-AVX2-LABEL: four_or_and_xor:
+; X86-AVX2: ## %bb.0: ## %entry
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT: vxorps %ymm2, %ymm1, %ymm1
+; X86-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
+; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0
+; X86-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0
+; X86-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: four_or_and_xor:
; X64-AVX2: ## %bb.0: ## %entry
@@ -916,6 +1247,32 @@ define <8 x i32> @four_or_and_xor(<8 x float> %x) {
; X64-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0
; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: retq
+;
+; X86-AVX512-LABEL: four_or_and_xor:
+; X86-AVX512: ## %bb.0: ## %entry
+; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0
+; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT: kxorw %k1, %k0, %k0
+; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1
+; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT: korw %k1, %k0, %k1
+; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT: retl
+;
+; X64-AVX512-LABEL: four_or_and_xor:
+; X64-AVX512: ## %bb.0: ## %entry
+; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
+; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT: kxorw %k1, %k0, %k0
+; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1
+; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT: korw %k1, %k0, %k1
+; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT: retq
entry:
%cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
%cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -929,20 +1286,20 @@ entry:
}
; Function Attrs: norecurse nounwind readnone ssp uwtable
define <8 x i32> @five_or_and_xor(<8 x float> %x) {
-; X32-LABEL: five_or_and_xor:
-; X32: ## %bb.0: ## %entry
-; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-NEXT: vxorps %ymm3, %ymm2, %ymm2
-; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm3
-; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT: vandps %ymm0, %ymm3, %ymm0
-; X32-NEXT: vxorps %ymm0, %ymm2, %ymm0
-; X32-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X32-NEXT: retl
+; X86-LABEL: five_or_and_xor:
+; X86: ## %bb.0: ## %entry
+; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-NEXT: vxorps %ymm3, %ymm2, %ymm2
+; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm3
+; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: vandps %ymm0, %ymm3, %ymm0
+; X86-NEXT: vxorps %ymm0, %ymm2, %ymm0
+; X86-NEXT: vorps %ymm1, %ymm0, %ymm0
+; X86-NEXT: retl
;
; X64-LABEL: five_or_and_xor:
; X64: ## %bb.0: ## %entry
@@ -959,23 +1316,23 @@ define <8 x i32> @five_or_and_xor(<8 x float> %x) {
; X64-NEXT: vorps %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
;
-; X32-AVX2-LABEL: five_or_and_xor:
-; X32-AVX2: ## %bb.0: ## %entry
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-AVX2-NEXT: vxorps %ymm3, %ymm2, %ymm2
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
-; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm4 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1]
-; X32-AVX2-NEXT: vcmpneqps %ymm4, %ymm0, %ymm0
-; X32-AVX2-NEXT: vandps %ymm0, %ymm3, %ymm0
-; X32-AVX2-NEXT: vxorps %ymm0, %ymm2, %ymm0
-; X32-AVX2-NEXT: vorps %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT: retl
+; X86-AVX2-LABEL: five_or_and_xor:
+; X86-AVX2: ## %bb.0: ## %entry
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-AVX2-NEXT: vxorps %ymm3, %ymm2, %ymm2
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
+; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm4 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1]
+; X86-AVX2-NEXT: vcmpneqps %ymm4, %ymm0, %ymm0
+; X86-AVX2-NEXT: vandps %ymm0, %ymm3, %ymm0
+; X86-AVX2-NEXT: vxorps %ymm0, %ymm2, %ymm0
+; X86-AVX2-NEXT: vorps %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: five_or_and_xor:
; X64-AVX2: ## %bb.0: ## %entry
@@ -994,6 +1351,36 @@ define <8 x i32> @five_or_and_xor(<8 x float> %x) {
; X64-AVX2-NEXT: vxorps %ymm0, %ymm2, %ymm0
; X64-AVX2-NEXT: vorps %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: retq
+;
+; X86-AVX512-LABEL: five_or_and_xor:
+; X86-AVX512: ## %bb.0: ## %entry
+; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0
+; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k2
+; X86-AVX512-NEXT: kxorw %k2, %k1, %k1
+; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k2
+; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k2 {%k2}
+; X86-AVX512-NEXT: kxorw %k2, %k1, %k1
+; X86-AVX512-NEXT: korw %k0, %k1, %k1
+; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT: retl
+;
+; X64-AVX512-LABEL: five_or_and_xor:
+; X64-AVX512: ## %bb.0: ## %entry
+; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
+; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k2
+; X64-AVX512-NEXT: kxorw %k2, %k1, %k1
+; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k2
+; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k2 {%k2}
+; X64-AVX512-NEXT: kxorw %k2, %k1, %k1
+; X64-AVX512-NEXT: korw %k0, %k1, %k1
+; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT: retq
entry:
%cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
%cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -1008,22 +1395,22 @@ entry:
ret <8 x i32> %or
}
define <8 x i32> @six_or_and_xor(<8 x float> %x) {
-; X32-LABEL: six_or_and_xor:
-; X32: ## %bb.0: ## %entry
-; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-NEXT: vandps %ymm3, %ymm2, %ymm2
-; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm3
-; X32-NEXT: vandps %ymm3, %ymm2, %ymm2
-; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm3
-; X32-NEXT: vxorps %ymm1, %ymm3, %ymm1
-; X32-NEXT: vxorps %ymm2, %ymm1, %ymm1
-; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT: vorps %ymm0, %ymm1, %ymm0
-; X32-NEXT: retl
+; X86-LABEL: six_or_and_xor:
+; X86: ## %bb.0: ## %entry
+; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-NEXT: vandps %ymm3, %ymm2, %ymm2
+; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm3
+; X86-NEXT: vandps %ymm3, %ymm2, %ymm2
+; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm3
+; X86-NEXT: vxorps %ymm1, %ymm3, %ymm1
+; X86-NEXT: vxorps %ymm2, %ymm1, %ymm1
+; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: vorps %ymm0, %ymm1, %ymm0
+; X86-NEXT: retl
;
; X64-LABEL: six_or_and_xor:
; X64: ## %bb.0: ## %entry
@@ -1042,26 +1429,26 @@ define <8 x i32> @six_or_and_xor(<8 x float> %x) {
; X64-NEXT: vorps %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
;
-; X32-AVX2-LABEL: six_or_and_xor:
-; X32-AVX2: ## %bb.0: ## %entry
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
-; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1]
-; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-AVX2-NEXT: vxorps %ymm1, %ymm3, %ymm1
-; X32-AVX2-NEXT: vxorps %ymm2, %ymm1, %ymm1
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1]
-; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0
-; X32-AVX2-NEXT: retl
+; X86-AVX2-LABEL: six_or_and_xor:
+; X86-AVX2: ## %bb.0: ## %entry
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
+; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1]
+; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-AVX2-NEXT: vxorps %ymm1, %ymm3, %ymm1
+; X86-AVX2-NEXT: vxorps %ymm2, %ymm1, %ymm1
+; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1]
+; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: six_or_and_xor:
; X64-AVX2: ## %bb.0: ## %entry
@@ -1083,6 +1470,38 @@ define <8 x i32> @six_or_and_xor(<8 x float> %x) {
; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0
; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: retq
+;
+; X86-AVX512-LABEL: six_or_and_xor:
+; X86-AVX512: ## %bb.0: ## %entry
+; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0
+; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1
+; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k2
+; X86-AVX512-NEXT: kxorw %k0, %k2, %k0
+; X86-AVX512-NEXT: kxorw %k1, %k0, %k0
+; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT: korw %k1, %k0, %k1
+; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT: retl
+;
+; X64-AVX512-LABEL: six_or_and_xor:
+; X64-AVX512: ## %bb.0: ## %entry
+; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
+; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1
+; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k2
+; X64-AVX512-NEXT: kxorw %k0, %k2, %k0
+; X64-AVX512-NEXT: kxorw %k1, %k0, %k0
+; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT: korw %k1, %k0, %k1
+; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT: retq
entry:
%cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
%cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
More information about the llvm-commits
mailing list