[llvm] c972e1c - [X86] v8i1-masks.ll - add avx512 test coverage and use X86 check prefix instead of X32

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Jan 21 06:14:40 PST 2023


Author: Simon Pilgrim
Date: 2023-01-21T14:14:31Z
New Revision: c972e1c8b59b144599b47e8a7946ff8e531a2049

URL: https://github.com/llvm/llvm-project/commit/c972e1c8b59b144599b47e8a7946ff8e531a2049
DIFF: https://github.com/llvm/llvm-project/commit/c972e1c8b59b144599b47e8a7946ff8e531a2049.diff

LOG: [X86] v8i1-masks.ll - add avx512 test coverage and use X86 check prefix instead of X32

We try to use X32 for tests on gnux32 triples

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/v8i1-masks.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/v8i1-masks.ll b/llvm/test/CodeGen/X86/v8i1-masks.ll
index c2dd3539e71e..212d9764622d 100644
--- a/llvm/test/CodeGen/X86/v8i1-masks.ll
+++ b/llvm/test/CodeGen/X86/v8i1-masks.ll
@@ -1,25 +1,27 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X86
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X64
-; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X32-AVX2
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X86-AVX2
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X64-AVX2
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512vl | FileCheck %s --check-prefix=X86-AVX512
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck %s --check-prefix=X64-AVX512
 
 define void @and_masks(ptr %a, ptr %b, ptr %c) nounwind uwtable noinline ssp {
-; X32-LABEL: and_masks:
-; X32:       ## %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    vmovups (%edx), %ymm0
-; X32-NEXT:    vmovups (%ecx), %ymm1
-; X32-NEXT:    vcmpltps %ymm0, %ymm1, %ymm1
-; X32-NEXT:    vmovups (%eax), %ymm2
-; X32-NEXT:    vcmpltps %ymm0, %ymm2, %ymm0
-; X32-NEXT:    vandps %ymm1, %ymm0, %ymm0
-; X32-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT:    vmovaps %ymm0, (%eax)
-; X32-NEXT:    vzeroupper
-; X32-NEXT:    retl
+; X86-LABEL: and_masks:
+; X86:       ## %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    vmovups (%edx), %ymm0
+; X86-NEXT:    vmovups (%ecx), %ymm1
+; X86-NEXT:    vcmpltps %ymm0, %ymm1, %ymm1
+; X86-NEXT:    vmovups (%eax), %ymm2
+; X86-NEXT:    vcmpltps %ymm0, %ymm2, %ymm0
+; X86-NEXT:    vandps %ymm1, %ymm0, %ymm0
+; X86-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT:    vmovaps %ymm0, (%eax)
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: and_masks:
 ; X64:       ## %bb.0:
@@ -34,21 +36,21 @@ define void @and_masks(ptr %a, ptr %b, ptr %c) nounwind uwtable noinline ssp {
 ; X64-NEXT:    vzeroupper
 ; X64-NEXT:    retq
 ;
-; X32-AVX2-LABEL: and_masks:
-; X32-AVX2:       ## %bb.0:
-; X32-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X32-AVX2-NEXT:    vmovups (%edx), %ymm0
-; X32-AVX2-NEXT:    vmovups (%ecx), %ymm1
-; X32-AVX2-NEXT:    vcmpltps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vmovups (%eax), %ymm2
-; X32-AVX2-NEXT:    vcmpltps %ymm0, %ymm2, %ymm0
-; X32-AVX2-NEXT:    vandps %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpsrld $31, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vmovdqa %ymm0, (%eax)
-; X32-AVX2-NEXT:    vzeroupper
-; X32-AVX2-NEXT:    retl
+; X86-AVX2-LABEL: and_masks:
+; X86-AVX2:       ## %bb.0:
+; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-AVX2-NEXT:    vmovups (%edx), %ymm0
+; X86-AVX2-NEXT:    vmovups (%ecx), %ymm1
+; X86-AVX2-NEXT:    vcmpltps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vmovups (%eax), %ymm2
+; X86-AVX2-NEXT:    vcmpltps %ymm0, %ymm2, %ymm0
+; X86-AVX2-NEXT:    vandps %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpsrld $31, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vmovdqa %ymm0, (%eax)
+; X86-AVX2-NEXT:    vzeroupper
+; X86-AVX2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: and_masks:
 ; X64-AVX2:       ## %bb.0:
@@ -62,6 +64,33 @@ define void @and_masks(ptr %a, ptr %b, ptr %c) nounwind uwtable noinline ssp {
 ; X64-AVX2-NEXT:    vmovdqa %ymm0, (%rax)
 ; X64-AVX2-NEXT:    vzeroupper
 ; X64-AVX2-NEXT:    retq
+;
+; X86-AVX512-LABEL: and_masks:
+; X86-AVX512:       ## %bb.0:
+; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-AVX512-NEXT:    vmovups (%edx), %ymm0
+; X86-AVX512-NEXT:    vcmpgtps (%ecx), %ymm0, %k1
+; X86-AVX512-NEXT:    vcmpgtps (%eax), %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT:    vpsrld $31, %ymm0, %ymm0
+; X86-AVX512-NEXT:    vmovdqa %ymm0, (%eax)
+; X86-AVX512-NEXT:    vzeroupper
+; X86-AVX512-NEXT:    retl
+;
+; X64-AVX512-LABEL: and_masks:
+; X64-AVX512:       ## %bb.0:
+; X64-AVX512-NEXT:    vmovups (%rdi), %ymm0
+; X64-AVX512-NEXT:    vcmpgtps (%rdx), %ymm0, %k1
+; X64-AVX512-NEXT:    vcmpgtps (%rsi), %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT:    vpsrld $31, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vmovdqa %ymm0, (%rax)
+; X64-AVX512-NEXT:    vzeroupper
+; X64-AVX512-NEXT:    retq
   %v0 = load <8 x float>, ptr %a, align 16
   %v1 = load <8 x float>, ptr %b, align 16
   %m0 = fcmp olt <8 x float> %v1, %v0
@@ -74,16 +103,16 @@ define void @and_masks(ptr %a, ptr %b, ptr %c) nounwind uwtable noinline ssp {
 }
 
 define void @neg_masks(ptr %a, ptr %b, ptr %c) nounwind uwtable noinline ssp {
-; X32-LABEL: neg_masks:
-; X32:       ## %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    vmovups (%ecx), %ymm0
-; X32-NEXT:    vcmpnltps (%eax), %ymm0, %ymm0
-; X32-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT:    vmovaps %ymm0, (%eax)
-; X32-NEXT:    vzeroupper
-; X32-NEXT:    retl
+; X86-LABEL: neg_masks:
+; X86:       ## %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vmovups (%ecx), %ymm0
+; X86-NEXT:    vcmpnltps (%eax), %ymm0, %ymm0
+; X86-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT:    vmovaps %ymm0, (%eax)
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: neg_masks:
 ; X64:       ## %bb.0:
@@ -94,17 +123,17 @@ define void @neg_masks(ptr %a, ptr %b, ptr %c) nounwind uwtable noinline ssp {
 ; X64-NEXT:    vzeroupper
 ; X64-NEXT:    retq
 ;
-; X32-AVX2-LABEL: neg_masks:
-; X32-AVX2:       ## %bb.0:
-; X32-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX2-NEXT:    vmovups (%ecx), %ymm0
-; X32-AVX2-NEXT:    vcmpnltps (%eax), %ymm0, %ymm0
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
-; X32-AVX2-NEXT:    vandps %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vmovaps %ymm0, (%eax)
-; X32-AVX2-NEXT:    vzeroupper
-; X32-AVX2-NEXT:    retl
+; X86-AVX2-LABEL: neg_masks:
+; X86-AVX2:       ## %bb.0:
+; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX2-NEXT:    vmovups (%ecx), %ymm0
+; X86-AVX2-NEXT:    vcmpnltps (%eax), %ymm0, %ymm0
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
+; X86-AVX2-NEXT:    vandps %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vmovaps %ymm0, (%eax)
+; X86-AVX2-NEXT:    vzeroupper
+; X86-AVX2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: neg_masks:
 ; X64-AVX2:       ## %bb.0:
@@ -115,6 +144,30 @@ define void @neg_masks(ptr %a, ptr %b, ptr %c) nounwind uwtable noinline ssp {
 ; X64-AVX2-NEXT:    vmovaps %ymm0, (%rax)
 ; X64-AVX2-NEXT:    vzeroupper
 ; X64-AVX2-NEXT:    retq
+;
+; X86-AVX512-LABEL: neg_masks:
+; X86-AVX512:       ## %bb.0:
+; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX512-NEXT:    vmovups (%ecx), %ymm0
+; X86-AVX512-NEXT:    vcmpnltps (%eax), %ymm0, %k1
+; X86-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT:    vpsrld $31, %ymm0, %ymm0
+; X86-AVX512-NEXT:    vmovdqa %ymm0, (%eax)
+; X86-AVX512-NEXT:    vzeroupper
+; X86-AVX512-NEXT:    retl
+;
+; X64-AVX512-LABEL: neg_masks:
+; X64-AVX512:       ## %bb.0:
+; X64-AVX512-NEXT:    vmovups (%rsi), %ymm0
+; X64-AVX512-NEXT:    vcmpnltps (%rdi), %ymm0, %k1
+; X64-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT:    vpsrld $31, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vmovdqa %ymm0, (%rax)
+; X64-AVX512-NEXT:    vzeroupper
+; X64-AVX512-NEXT:    retq
   %v0 = load <8 x float>, ptr %a, align 16
   %v1 = load <8 x float>, ptr %b, align 16
   %m0 = fcmp olt <8 x float> %v1, %v0
@@ -125,15 +178,15 @@ define void @neg_masks(ptr %a, ptr %b, ptr %c) nounwind uwtable noinline ssp {
 }
 
 define <8 x i32> @and_mask_constant(<8 x i32> %v0, <8 x i32> %v1) {
-; X32-LABEL: and_mask_constant:
-; X32:       ## %bb.0:
-; X32-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; X32-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; X32-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
-; X32-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
-; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X32-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: and_mask_constant:
+; X86:       ## %bb.0:
+; X86-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; X86-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; X86-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
+; X86-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
+; X86-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: and_mask_constant:
 ; X64:       ## %bb.0:
@@ -145,12 +198,12 @@ define <8 x i32> @and_mask_constant(<8 x i32> %v0, <8 x i32> %v1) {
 ; X64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
 ; X64-NEXT:    retq
 ;
-; X32-AVX2-LABEL: and_mask_constant:
-; X32-AVX2:       ## %bb.0:
-; X32-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; X32-AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX2-LABEL: and_mask_constant:
+; X86-AVX2:       ## %bb.0:
+; X86-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; X86-AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: and_mask_constant:
 ; X64-AVX2:       ## %bb.0:
@@ -158,6 +211,26 @@ define <8 x i32> @and_mask_constant(<8 x i32> %v0, <8 x i32> %v1) {
 ; X64-AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
 ; X64-AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
 ; X64-AVX2-NEXT:    retq
+;
+; X86-AVX512-LABEL: and_mask_constant:
+; X86-AVX512:       ## %bb.0:
+; X86-AVX512-NEXT:    movb $105, %al
+; X86-AVX512-NEXT:    kmovw %eax, %k1
+; X86-AVX512-NEXT:    vptestnmd %ymm0, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT:    vpsrld $31, %ymm0, %ymm0
+; X86-AVX512-NEXT:    retl
+;
+; X64-AVX512-LABEL: and_mask_constant:
+; X64-AVX512:       ## %bb.0:
+; X64-AVX512-NEXT:    movb $105, %al
+; X64-AVX512-NEXT:    kmovw %eax, %k1
+; X64-AVX512-NEXT:    vptestnmd %ymm0, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT:    vpsrld $31, %ymm0, %ymm0
+; X64-AVX512-NEXT:    retq
   %m = icmp eq <8 x i32> %v0, zeroinitializer
   %mand = and <8 x i1> %m, <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false>
   %r = zext <8 x i1> %mand to <8 x i32>
@@ -165,13 +238,13 @@ define <8 x i32> @and_mask_constant(<8 x i32> %v0, <8 x i32> %v1) {
 }
 
 define <8 x i32> @two_ands(<8 x float> %x) local_unnamed_addr #0 {
-; X32-LABEL: two_ands:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT:    vandps %ymm0, %ymm1, %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: two_ands:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT:    vandps %ymm0, %ymm1, %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: two_ands:
 ; X64:       ## %bb.0: ## %entry
@@ -181,14 +254,14 @@ define <8 x i32> @two_ands(<8 x float> %x) local_unnamed_addr #0 {
 ; X64-NEXT:    vandps %ymm0, %ymm1, %ymm0
 ; X64-NEXT:    retq
 ;
-; X32-AVX2-LABEL: two_ands:
-; X32-AVX2:       ## %bb.0: ## %entry
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vandps %ymm0, %ymm1, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX2-LABEL: two_ands:
+; X86-AVX2:       ## %bb.0: ## %entry
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vandps %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: two_ands:
 ; X64-AVX2:       ## %bb.0: ## %entry
@@ -198,6 +271,22 @@ define <8 x i32> @two_ands(<8 x float> %x) local_unnamed_addr #0 {
 ; X64-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm0
 ; X64-AVX2-NEXT:    vandps %ymm0, %ymm1, %ymm0
 ; X64-AVX2-NEXT:    retq
+;
+; X86-AVX512-LABEL: two_ands:
+; X86-AVX512:       ## %bb.0: ## %entry
+; X86-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT:    retl
+;
+; X64-AVX512-LABEL: two_ands:
+; X64-AVX512:       ## %bb.0: ## %entry
+; X64-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT:    retq
 entry:
   %cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
   %cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -207,16 +296,16 @@ entry:
 }
 
 define <8 x i32> @three_ands(<8 x float> %x) {
-; X32-LABEL: three_ands:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT:    vxorps %xmm3, %xmm3, %xmm3
-; X32-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm0
-; X32-NEXT:    vandps %ymm0, %ymm2, %ymm0
-; X32-NEXT:    vandps %ymm0, %ymm1, %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: three_ands:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; X86-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm0
+; X86-NEXT:    vandps %ymm0, %ymm2, %ymm0
+; X86-NEXT:    vandps %ymm0, %ymm1, %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: three_ands:
 ; X64:       ## %bb.0: ## %entry
@@ -229,17 +318,17 @@ define <8 x i32> @three_ands(<8 x float> %x) {
 ; X64-NEXT:    vandps %ymm0, %ymm1, %ymm0
 ; X64-NEXT:    retq
 ;
-; X32-AVX2-LABEL: three_ands:
-; X32-AVX2:       ## %bb.0: ## %entry
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; X32-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vandps %ymm0, %ymm1, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX2-LABEL: three_ands:
+; X86-AVX2:       ## %bb.0: ## %entry
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vandps %ymm2, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vandps %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: three_ands:
 ; X64-AVX2:       ## %bb.0: ## %entry
@@ -252,6 +341,26 @@ define <8 x i32> @three_ands(<8 x float> %x) {
 ; X64-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm0
 ; X64-AVX2-NEXT:    vandps %ymm0, %ymm1, %ymm0
 ; X64-AVX2-NEXT:    retq
+;
+; X86-AVX512-LABEL: three_ands:
+; X86-AVX512:       ## %bb.0: ## %entry
+; X86-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT:    retl
+;
+; X64-AVX512-LABEL: three_ands:
+; X64-AVX512:       ## %bb.0: ## %entry
+; X64-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT:    retq
 entry:
   %cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
   %cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -263,18 +372,18 @@ entry:
 }
 
 define <8 x i32> @four_ands(<8 x float> %x) {
-; X32-LABEL: four_ands:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT:    vxorps %xmm3, %xmm3, %xmm3
-; X32-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-NEXT:    vandps %ymm3, %ymm2, %ymm2
-; X32-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; X32-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT:    vandps %ymm0, %ymm1, %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: four_ands:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; X86-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-NEXT:    vandps %ymm3, %ymm2, %ymm2
+; X86-NEXT:    vandps %ymm2, %ymm1, %ymm1
+; X86-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT:    vandps %ymm0, %ymm1, %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: four_ands:
 ; X64:       ## %bb.0: ## %entry
@@ -289,20 +398,20 @@ define <8 x i32> @four_ands(<8 x float> %x) {
 ; X64-NEXT:    vandps %ymm0, %ymm1, %ymm0
 ; X64-NEXT:    retq
 ;
-; X32-AVX2-LABEL: four_ands:
-; X32-AVX2:       ## %bb.0: ## %entry
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; X32-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
-; X32-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vandps %ymm0, %ymm2, %ymm0
-; X32-AVX2-NEXT:    vandps %ymm0, %ymm1, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX2-LABEL: four_ands:
+; X86-AVX2:       ## %bb.0: ## %entry
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vandps %ymm2, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
+; X86-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vandps %ymm0, %ymm2, %ymm0
+; X86-AVX2-NEXT:    vandps %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: four_ands:
 ; X64-AVX2:       ## %bb.0: ## %entry
@@ -318,6 +427,28 @@ define <8 x i32> @four_ands(<8 x float> %x) {
 ; X64-AVX2-NEXT:    vandps %ymm0, %ymm2, %ymm0
 ; X64-AVX2-NEXT:    vandps %ymm0, %ymm1, %ymm0
 ; X64-AVX2-NEXT:    retq
+;
+; X86-AVX512-LABEL: four_ands:
+; X86-AVX512:       ## %bb.0: ## %entry
+; X86-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT:    retl
+;
+; X64-AVX512-LABEL: four_ands:
+; X64-AVX512:       ## %bb.0: ## %entry
+; X64-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT:    retq
 entry:
   %cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
   %cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -331,20 +462,20 @@ entry:
 }
 
 define <8 x i32> @five_ands(<8 x float> %x) {
-; X32-LABEL: five_ands:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT:    vxorps %xmm3, %xmm3, %xmm3
-; X32-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-NEXT:    vandps %ymm3, %ymm2, %ymm2
-; X32-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; X32-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT:    vandps %ymm0, %ymm2, %ymm0
-; X32-NEXT:    vandps %ymm0, %ymm1, %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: five_ands:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; X86-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-NEXT:    vandps %ymm3, %ymm2, %ymm2
+; X86-NEXT:    vandps %ymm2, %ymm1, %ymm1
+; X86-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT:    vandps %ymm0, %ymm2, %ymm0
+; X86-NEXT:    vandps %ymm0, %ymm1, %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: five_ands:
 ; X64:       ## %bb.0: ## %entry
@@ -361,23 +492,23 @@ define <8 x i32> @five_ands(<8 x float> %x) {
 ; X64-NEXT:    vandps %ymm0, %ymm1, %ymm0
 ; X64-NEXT:    retq
 ;
-; X32-AVX2-LABEL: five_ands:
-; X32-AVX2:       ## %bb.0: ## %entry
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; X32-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
-; X32-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-AVX2-NEXT:    vandps %ymm3, %ymm2, %ymm2
-; X32-AVX2-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1]
-; X32-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vandps %ymm0, %ymm1, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX2-LABEL: five_ands:
+; X86-AVX2:       ## %bb.0: ## %entry
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vandps %ymm2, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
+; X86-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-AVX2-NEXT:    vandps %ymm3, %ymm2, %ymm2
+; X86-AVX2-NEXT:    vandps %ymm2, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1]
+; X86-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vandps %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: five_ands:
 ; X64-AVX2:       ## %bb.0: ## %entry
@@ -396,6 +527,30 @@ define <8 x i32> @five_ands(<8 x float> %x) {
 ; X64-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm0
 ; X64-AVX2-NEXT:    vandps %ymm0, %ymm1, %ymm0
 ; X64-AVX2-NEXT:    retq
+;
+; X86-AVX512-LABEL: five_ands:
+; X86-AVX512:       ## %bb.0: ## %entry
+; X86-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT:    retl
+;
+; X64-AVX512-LABEL: five_ands:
+; X64-AVX512:       ## %bb.0: ## %entry
+; X64-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT:    retq
 entry:
   %cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
   %cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -411,13 +566,13 @@ entry:
 }
 
 define <8 x i32> @two_or(<8 x float> %x) {
-; X32-LABEL: two_or:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT:    vorps %ymm0, %ymm1, %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: two_or:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: two_or:
 ; X64:       ## %bb.0: ## %entry
@@ -427,14 +582,14 @@ define <8 x i32> @two_or(<8 x float> %x) {
 ; X64-NEXT:    vorps %ymm0, %ymm1, %ymm0
 ; X64-NEXT:    retq
 ;
-; X32-AVX2-LABEL: two_or:
-; X32-AVX2:       ## %bb.0: ## %entry
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vorps %ymm0, %ymm1, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX2-LABEL: two_or:
+; X86-AVX2:       ## %bb.0: ## %entry
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: two_or:
 ; X64-AVX2:       ## %bb.0: ## %entry
@@ -444,6 +599,24 @@ define <8 x i32> @two_or(<8 x float> %x) {
 ; X64-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm0
 ; X64-AVX2-NEXT:    vorps %ymm0, %ymm1, %ymm0
 ; X64-AVX2-NEXT:    retq
+;
+; X86-AVX512-LABEL: two_or:
+; X86-AVX512:       ## %bb.0: ## %entry
+; X86-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0
+; X86-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT:    korw %k1, %k0, %k1
+; X86-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT:    retl
+;
+; X64-AVX512-LABEL: two_or:
+; X64-AVX512:       ## %bb.0: ## %entry
+; X64-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
+; X64-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT:    korw %k1, %k0, %k1
+; X64-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT:    retq
 entry:
   %cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
   %cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -453,16 +626,16 @@ entry:
 }
 
 define <8 x i32> @three_or(<8 x float> %x) {
-; X32-LABEL: three_or:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT:    vxorps %xmm3, %xmm3, %xmm3
-; X32-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm0
-; X32-NEXT:    vorps %ymm0, %ymm2, %ymm0
-; X32-NEXT:    vorps %ymm0, %ymm1, %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: three_or:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; X86-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm0
+; X86-NEXT:    vorps %ymm0, %ymm2, %ymm0
+; X86-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: three_or:
 ; X64:       ## %bb.0: ## %entry
@@ -475,17 +648,17 @@ define <8 x i32> @three_or(<8 x float> %x) {
 ; X64-NEXT:    vorps %ymm0, %ymm1, %ymm0
 ; X64-NEXT:    retq
 ;
-; X32-AVX2-LABEL: three_or:
-; X32-AVX2:       ## %bb.0: ## %entry
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vorps %ymm2, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; X32-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vorps %ymm0, %ymm1, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX2-LABEL: three_or:
+; X86-AVX2:       ## %bb.0: ## %entry
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vorps %ymm2, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: three_or:
 ; X64-AVX2:       ## %bb.0: ## %entry
@@ -498,6 +671,30 @@ define <8 x i32> @three_or(<8 x float> %x) {
 ; X64-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm0
 ; X64-AVX2-NEXT:    vorps %ymm0, %ymm1, %ymm0
 ; X64-AVX2-NEXT:    retq
+;
+; X86-AVX512-LABEL: three_or:
+; X86-AVX512:       ## %bb.0: ## %entry
+; X86-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0
+; X86-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT:    korw %k1, %k0, %k0
+; X86-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k1
+; X86-AVX512-NEXT:    korw %k1, %k0, %k1
+; X86-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT:    retl
+;
+; X64-AVX512-LABEL: three_or:
+; X64-AVX512:       ## %bb.0: ## %entry
+; X64-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
+; X64-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT:    korw %k1, %k0, %k0
+; X64-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k1
+; X64-AVX512-NEXT:    korw %k1, %k0, %k1
+; X64-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT:    retq
 entry:
   %cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
   %cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -509,18 +706,18 @@ entry:
 }
 ; Function Attrs: norecurse nounwind readnone ssp uwtable
 define <8 x i32> @four_or(<8 x float> %x) {
-; X32-LABEL: four_or:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT:    vxorps %xmm3, %xmm3, %xmm3
-; X32-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-NEXT:    vorps %ymm3, %ymm2, %ymm2
-; X32-NEXT:    vorps %ymm2, %ymm1, %ymm1
-; X32-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT:    vorps %ymm0, %ymm1, %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: four_or:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; X86-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-NEXT:    vorps %ymm3, %ymm2, %ymm2
+; X86-NEXT:    vorps %ymm2, %ymm1, %ymm1
+; X86-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: four_or:
 ; X64:       ## %bb.0: ## %entry
@@ -535,20 +732,20 @@ define <8 x i32> @four_or(<8 x float> %x) {
 ; X64-NEXT:    vorps %ymm0, %ymm1, %ymm0
 ; X64-NEXT:    retq
 ;
-; X32-AVX2-LABEL: four_or:
-; X32-AVX2:       ## %bb.0: ## %entry
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vorps %ymm2, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; X32-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
-; X32-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vorps %ymm0, %ymm2, %ymm0
-; X32-AVX2-NEXT:    vorps %ymm0, %ymm1, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX2-LABEL: four_or:
+; X86-AVX2:       ## %bb.0: ## %entry
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vorps %ymm2, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
+; X86-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vorps %ymm0, %ymm2, %ymm0
+; X86-AVX2-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: four_or:
 ; X64-AVX2:       ## %bb.0: ## %entry
@@ -564,6 +761,34 @@ define <8 x i32> @four_or(<8 x float> %x) {
 ; X64-AVX2-NEXT:    vorps %ymm0, %ymm2, %ymm0
 ; X64-AVX2-NEXT:    vorps %ymm0, %ymm1, %ymm0
 ; X64-AVX2-NEXT:    retq
+;
+; X86-AVX512-LABEL: four_or:
+; X86-AVX512:       ## %bb.0: ## %entry
+; X86-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0
+; X86-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT:    korw %k1, %k0, %k0
+; X86-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k1
+; X86-AVX512-NEXT:    korw %k1, %k0, %k0
+; X86-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT:    korw %k1, %k0, %k1
+; X86-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT:    retl
+;
+; X64-AVX512-LABEL: four_or:
+; X64-AVX512:       ## %bb.0: ## %entry
+; X64-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
+; X64-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT:    korw %k1, %k0, %k0
+; X64-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k1
+; X64-AVX512-NEXT:    korw %k1, %k0, %k0
+; X64-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT:    korw %k1, %k0, %k1
+; X64-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT:    retq
 entry:
   %cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
   %cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -577,20 +802,20 @@ entry:
 }
 ; Function Attrs: norecurse nounwind readnone ssp uwtable
 define <8 x i32> @five_or(<8 x float> %x) {
-; X32-LABEL: five_or:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT:    vxorps %xmm3, %xmm3, %xmm3
-; X32-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-NEXT:    vorps %ymm3, %ymm2, %ymm2
-; X32-NEXT:    vorps %ymm2, %ymm1, %ymm1
-; X32-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT:    vorps %ymm0, %ymm2, %ymm0
-; X32-NEXT:    vorps %ymm0, %ymm1, %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: five_or:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; X86-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-NEXT:    vorps %ymm3, %ymm2, %ymm2
+; X86-NEXT:    vorps %ymm2, %ymm1, %ymm1
+; X86-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT:    vorps %ymm0, %ymm2, %ymm0
+; X86-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: five_or:
 ; X64:       ## %bb.0: ## %entry
@@ -607,23 +832,23 @@ define <8 x i32> @five_or(<8 x float> %x) {
 ; X64-NEXT:    vorps %ymm0, %ymm1, %ymm0
 ; X64-NEXT:    retq
 ;
-; X32-AVX2-LABEL: five_or:
-; X32-AVX2:       ## %bb.0: ## %entry
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vorps %ymm2, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; X32-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
-; X32-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-AVX2-NEXT:    vorps %ymm3, %ymm2, %ymm2
-; X32-AVX2-NEXT:    vorps %ymm2, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1]
-; X32-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vorps %ymm0, %ymm1, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX2-LABEL: five_or:
+; X86-AVX2:       ## %bb.0: ## %entry
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vorps %ymm2, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
+; X86-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-AVX2-NEXT:    vorps %ymm3, %ymm2, %ymm2
+; X86-AVX2-NEXT:    vorps %ymm2, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1]
+; X86-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: five_or:
 ; X64-AVX2:       ## %bb.0: ## %entry
@@ -642,6 +867,38 @@ define <8 x i32> @five_or(<8 x float> %x) {
 ; X64-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm0
 ; X64-AVX2-NEXT:    vorps %ymm0, %ymm1, %ymm0
 ; X64-AVX2-NEXT:    retq
+;
+; X86-AVX512-LABEL: five_or:
+; X86-AVX512:       ## %bb.0: ## %entry
+; X86-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0
+; X86-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT:    korw %k1, %k0, %k0
+; X86-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k1
+; X86-AVX512-NEXT:    korw %k1, %k0, %k0
+; X86-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT:    korw %k1, %k0, %k0
+; X86-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT:    korw %k1, %k0, %k1
+; X86-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT:    retl
+;
+; X64-AVX512-LABEL: five_or:
+; X64-AVX512:       ## %bb.0: ## %entry
+; X64-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
+; X64-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT:    korw %k1, %k0, %k0
+; X64-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k1
+; X64-AVX512-NEXT:    korw %k1, %k0, %k0
+; X64-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT:    korw %k1, %k0, %k0
+; X64-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT:    korw %k1, %k0, %k1
+; X64-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT:    retq
 entry:
   %cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
   %cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -657,16 +914,16 @@ entry:
 }
 
 define <8 x i32> @three_or_and(<8 x float> %x) {
-; X32-LABEL: three_or_and:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT:    vxorps %xmm3, %xmm3, %xmm3
-; X32-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm0
-; X32-NEXT:    vandps %ymm0, %ymm2, %ymm0
-; X32-NEXT:    vorps %ymm1, %ymm0, %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: three_or_and:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; X86-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm0
+; X86-NEXT:    vandps %ymm0, %ymm2, %ymm0
+; X86-NEXT:    vorps %ymm1, %ymm0, %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: three_or_and:
 ; X64:       ## %bb.0: ## %entry
@@ -679,17 +936,17 @@ define <8 x i32> @three_or_and(<8 x float> %x) {
 ; X64-NEXT:    vorps %ymm1, %ymm0, %ymm0
 ; X64-NEXT:    retq
 ;
-; X32-AVX2-LABEL: three_or_and:
-; X32-AVX2:       ## %bb.0: ## %entry
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vxorps %xmm3, %xmm3, %xmm3
-; X32-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vandps %ymm0, %ymm2, %ymm0
-; X32-AVX2-NEXT:    vorps %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX2-LABEL: three_or_and:
+; X86-AVX2:       ## %bb.0: ## %entry
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; X86-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vandps %ymm0, %ymm2, %ymm0
+; X86-AVX2-NEXT:    vorps %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: three_or_and:
 ; X64-AVX2:       ## %bb.0: ## %entry
@@ -702,6 +959,28 @@ define <8 x i32> @three_or_and(<8 x float> %x) {
 ; X64-AVX2-NEXT:    vandps %ymm0, %ymm2, %ymm0
 ; X64-AVX2-NEXT:    vorps %ymm1, %ymm0, %ymm0
 ; X64-AVX2-NEXT:    retq
+;
+; X86-AVX512-LABEL: three_or_and:
+; X86-AVX512:       ## %bb.0: ## %entry
+; X86-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0
+; X86-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k1
+; X86-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT:    korw %k0, %k1, %k1
+; X86-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT:    retl
+;
+; X64-AVX512-LABEL: three_or_and:
+; X64-AVX512:       ## %bb.0: ## %entry
+; X64-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
+; X64-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k1
+; X64-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT:    korw %k0, %k1, %k1
+; X64-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT:    retq
 entry:
   %cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
   %cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -713,18 +992,18 @@ entry:
 }
 
 define <8 x i32> @four_or_and(<8 x float> %x) {
-; X32-LABEL: four_or_and:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; X32-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; X32-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm2
-; X32-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT:    vandps %ymm0, %ymm2, %ymm0
-; X32-NEXT:    vorps %ymm0, %ymm1, %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: four_or_and:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT:    vandps %ymm2, %ymm1, %ymm1
+; X86-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; X86-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm2
+; X86-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT:    vandps %ymm0, %ymm2, %ymm0
+; X86-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: four_or_and:
 ; X64:       ## %bb.0: ## %entry
@@ -739,20 +1018,20 @@ define <8 x i32> @four_or_and(<8 x float> %x) {
 ; X64-NEXT:    vorps %ymm0, %ymm1, %ymm0
 ; X64-NEXT:    retq
 ;
-; X32-AVX2-LABEL: four_or_and:
-; X32-AVX2:       ## %bb.0: ## %entry
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; X32-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
-; X32-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vandps %ymm0, %ymm2, %ymm0
-; X32-AVX2-NEXT:    vorps %ymm0, %ymm1, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX2-LABEL: four_or_and:
+; X86-AVX2:       ## %bb.0: ## %entry
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vandps %ymm2, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
+; X86-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vandps %ymm0, %ymm2, %ymm0
+; X86-AVX2-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: four_or_and:
 ; X64-AVX2:       ## %bb.0: ## %entry
@@ -768,6 +1047,30 @@ define <8 x i32> @four_or_and(<8 x float> %x) {
 ; X64-AVX2-NEXT:    vandps %ymm0, %ymm2, %ymm0
 ; X64-AVX2-NEXT:    vorps %ymm0, %ymm1, %ymm0
 ; X64-AVX2-NEXT:    retq
+;
+; X86-AVX512-LABEL: four_or_and:
+; X86-AVX512:       ## %bb.0: ## %entry
+; X86-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0 {%k1}
+; X86-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k1
+; X86-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT:    korw %k1, %k0, %k1
+; X86-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT:    retl
+;
+; X64-AVX512-LABEL: four_or_and:
+; X64-AVX512:       ## %bb.0: ## %entry
+; X64-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0 {%k1}
+; X64-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k1
+; X64-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT:    korw %k1, %k0, %k1
+; X64-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT:    retq
 entry:
   %cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
   %cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -781,20 +1084,20 @@ entry:
 }
 
 define <8 x i32> @five_or_and(<8 x float> %x) {
-; X32-LABEL: five_or_and:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT:    vxorps %xmm3, %xmm3, %xmm3
-; X32-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-NEXT:    vandps %ymm3, %ymm2, %ymm2
-; X32-NEXT:    vorps %ymm1, %ymm2, %ymm1
-; X32-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT:    vandps %ymm0, %ymm2, %ymm0
-; X32-NEXT:    vorps %ymm0, %ymm1, %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: five_or_and:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; X86-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-NEXT:    vandps %ymm3, %ymm2, %ymm2
+; X86-NEXT:    vorps %ymm1, %ymm2, %ymm1
+; X86-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT:    vandps %ymm0, %ymm2, %ymm0
+; X86-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: five_or_and:
 ; X64:       ## %bb.0: ## %entry
@@ -811,23 +1114,23 @@ define <8 x i32> @five_or_and(<8 x float> %x) {
 ; X64-NEXT:    vorps %ymm0, %ymm1, %ymm0
 ; X64-NEXT:    retq
 ;
-; X32-AVX2-LABEL: five_or_and:
-; X32-AVX2:       ## %bb.0: ## %entry
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vxorps %xmm3, %xmm3, %xmm3
-; X32-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-AVX2-NEXT:    vandps %ymm3, %ymm2, %ymm2
-; X32-AVX2-NEXT:    vorps %ymm1, %ymm2, %ymm1
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
-; X32-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1]
-; X32-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vandps %ymm0, %ymm2, %ymm0
-; X32-AVX2-NEXT:    vorps %ymm0, %ymm1, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX2-LABEL: five_or_and:
+; X86-AVX2:       ## %bb.0: ## %entry
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; X86-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-AVX2-NEXT:    vandps %ymm3, %ymm2, %ymm2
+; X86-AVX2-NEXT:    vorps %ymm1, %ymm2, %ymm1
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
+; X86-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1]
+; X86-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vandps %ymm0, %ymm2, %ymm0
+; X86-AVX2-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: five_or_and:
 ; X64-AVX2:       ## %bb.0: ## %entry
@@ -846,6 +1149,34 @@ define <8 x i32> @five_or_and(<8 x float> %x) {
 ; X64-AVX2-NEXT:    vandps %ymm0, %ymm2, %ymm0
 ; X64-AVX2-NEXT:    vorps %ymm0, %ymm1, %ymm0
 ; X64-AVX2-NEXT:    retq
+;
+; X86-AVX512-LABEL: five_or_and:
+; X86-AVX512:       ## %bb.0: ## %entry
+; X86-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0
+; X86-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k1
+; X86-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT:    korw %k0, %k1, %k0
+; X86-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT:    korw %k1, %k0, %k1
+; X86-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT:    retl
+;
+; X64-AVX512-LABEL: five_or_and:
+; X64-AVX512:       ## %bb.0: ## %entry
+; X64-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
+; X64-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k1
+; X64-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT:    korw %k0, %k1, %k0
+; X64-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT:    korw %k1, %k0, %k1
+; X64-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT:    retq
 entry:
   %cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
   %cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -861,18 +1192,18 @@ entry:
 }
 
 define <8 x i32> @four_or_and_xor(<8 x float> %x) {
-; X32-LABEL: four_or_and_xor:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT:    vxorps %ymm2, %ymm1, %ymm1
-; X32-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; X32-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm2
-; X32-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT:    vandps %ymm0, %ymm2, %ymm0
-; X32-NEXT:    vorps %ymm0, %ymm1, %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: four_or_and_xor:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT:    vxorps %ymm2, %ymm1, %ymm1
+; X86-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; X86-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm2
+; X86-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT:    vandps %ymm0, %ymm2, %ymm0
+; X86-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: four_or_and_xor:
 ; X64:       ## %bb.0: ## %entry
@@ -887,20 +1218,20 @@ define <8 x i32> @four_or_and_xor(<8 x float> %x) {
 ; X64-NEXT:    vorps %ymm0, %ymm1, %ymm0
 ; X64-NEXT:    retq
 ;
-; X32-AVX2-LABEL: four_or_and_xor:
-; X32-AVX2:       ## %bb.0: ## %entry
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vxorps %ymm2, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; X32-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
-; X32-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vandps %ymm0, %ymm2, %ymm0
-; X32-AVX2-NEXT:    vorps %ymm0, %ymm1, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX2-LABEL: four_or_and_xor:
+; X86-AVX2:       ## %bb.0: ## %entry
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vxorps %ymm2, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
+; X86-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vandps %ymm0, %ymm2, %ymm0
+; X86-AVX2-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: four_or_and_xor:
 ; X64-AVX2:       ## %bb.0: ## %entry
@@ -916,6 +1247,32 @@ define <8 x i32> @four_or_and_xor(<8 x float> %x) {
 ; X64-AVX2-NEXT:    vandps %ymm0, %ymm2, %ymm0
 ; X64-AVX2-NEXT:    vorps %ymm0, %ymm1, %ymm0
 ; X64-AVX2-NEXT:    retq
+;
+; X86-AVX512-LABEL: four_or_and_xor:
+; X86-AVX512:       ## %bb.0: ## %entry
+; X86-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0
+; X86-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT:    kxorw %k1, %k0, %k0
+; X86-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k1
+; X86-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT:    korw %k1, %k0, %k1
+; X86-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT:    retl
+;
+; X64-AVX512-LABEL: four_or_and_xor:
+; X64-AVX512:       ## %bb.0: ## %entry
+; X64-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
+; X64-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT:    kxorw %k1, %k0, %k0
+; X64-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k1
+; X64-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT:    korw %k1, %k0, %k1
+; X64-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT:    retq
 entry:
   %cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
   %cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -929,20 +1286,20 @@ entry:
 }
 ; Function Attrs: norecurse nounwind readnone ssp uwtable
 define <8 x i32> @five_or_and_xor(<8 x float> %x) {
-; X32-LABEL: five_or_and_xor:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT:    vxorps %xmm3, %xmm3, %xmm3
-; X32-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-NEXT:    vxorps %ymm3, %ymm2, %ymm2
-; X32-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm3
-; X32-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT:    vandps %ymm0, %ymm3, %ymm0
-; X32-NEXT:    vxorps %ymm0, %ymm2, %ymm0
-; X32-NEXT:    vorps %ymm1, %ymm0, %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: five_or_and_xor:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; X86-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-NEXT:    vxorps %ymm3, %ymm2, %ymm2
+; X86-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm3
+; X86-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT:    vandps %ymm0, %ymm3, %ymm0
+; X86-NEXT:    vxorps %ymm0, %ymm2, %ymm0
+; X86-NEXT:    vorps %ymm1, %ymm0, %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: five_or_and_xor:
 ; X64:       ## %bb.0: ## %entry
@@ -959,23 +1316,23 @@ define <8 x i32> @five_or_and_xor(<8 x float> %x) {
 ; X64-NEXT:    vorps %ymm1, %ymm0, %ymm0
 ; X64-NEXT:    retq
 ;
-; X32-AVX2-LABEL: five_or_and_xor:
-; X32-AVX2:       ## %bb.0: ## %entry
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vxorps %xmm3, %xmm3, %xmm3
-; X32-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-AVX2-NEXT:    vxorps %ymm3, %ymm2, %ymm2
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
-; X32-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm4 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1]
-; X32-AVX2-NEXT:    vcmpneqps %ymm4, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vandps %ymm0, %ymm3, %ymm0
-; X32-AVX2-NEXT:    vxorps %ymm0, %ymm2, %ymm0
-; X32-AVX2-NEXT:    vorps %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX2-LABEL: five_or_and_xor:
+; X86-AVX2:       ## %bb.0: ## %entry
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; X86-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-AVX2-NEXT:    vxorps %ymm3, %ymm2, %ymm2
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
+; X86-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm4 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1]
+; X86-AVX2-NEXT:    vcmpneqps %ymm4, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vandps %ymm0, %ymm3, %ymm0
+; X86-AVX2-NEXT:    vxorps %ymm0, %ymm2, %ymm0
+; X86-AVX2-NEXT:    vorps %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: five_or_and_xor:
 ; X64-AVX2:       ## %bb.0: ## %entry
@@ -994,6 +1351,36 @@ define <8 x i32> @five_or_and_xor(<8 x float> %x) {
 ; X64-AVX2-NEXT:    vxorps %ymm0, %ymm2, %ymm0
 ; X64-AVX2-NEXT:    vorps %ymm1, %ymm0, %ymm0
 ; X64-AVX2-NEXT:    retq
+;
+; X86-AVX512-LABEL: five_or_and_xor:
+; X86-AVX512:       ## %bb.0: ## %entry
+; X86-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0
+; X86-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k2
+; X86-AVX512-NEXT:    kxorw %k2, %k1, %k1
+; X86-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k2
+; X86-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k2 {%k2}
+; X86-AVX512-NEXT:    kxorw %k2, %k1, %k1
+; X86-AVX512-NEXT:    korw %k0, %k1, %k1
+; X86-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT:    retl
+;
+; X64-AVX512-LABEL: five_or_and_xor:
+; X64-AVX512:       ## %bb.0: ## %entry
+; X64-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
+; X64-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k2
+; X64-AVX512-NEXT:    kxorw %k2, %k1, %k1
+; X64-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k2
+; X64-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k2 {%k2}
+; X64-AVX512-NEXT:    kxorw %k2, %k1, %k1
+; X64-AVX512-NEXT:    korw %k0, %k1, %k1
+; X64-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT:    retq
 entry:
   %cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
   %cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
@@ -1008,22 +1395,22 @@ entry:
   ret <8 x i32> %or
 }
 define <8 x i32> @six_or_and_xor(<8 x float> %x) {
-; X32-LABEL: six_or_and_xor:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
-; X32-NEXT:    vxorps %xmm3, %xmm3, %xmm3
-; X32-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-NEXT:    vandps %ymm3, %ymm2, %ymm2
-; X32-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm3
-; X32-NEXT:    vandps %ymm3, %ymm2, %ymm2
-; X32-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm3
-; X32-NEXT:    vxorps %ymm1, %ymm3, %ymm1
-; X32-NEXT:    vxorps %ymm2, %ymm1, %ymm1
-; X32-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X32-NEXT:    vorps %ymm0, %ymm1, %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: six_or_and_xor:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
+; X86-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; X86-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-NEXT:    vandps %ymm3, %ymm2, %ymm2
+; X86-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm3
+; X86-NEXT:    vandps %ymm3, %ymm2, %ymm2
+; X86-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm3
+; X86-NEXT:    vxorps %ymm1, %ymm3, %ymm1
+; X86-NEXT:    vxorps %ymm2, %ymm1, %ymm1
+; X86-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: six_or_and_xor:
 ; X64:       ## %bb.0: ## %entry
@@ -1042,26 +1429,26 @@ define <8 x i32> @six_or_and_xor(<8 x float> %x) {
 ; X64-NEXT:    vorps %ymm0, %ymm1, %ymm0
 ; X64-NEXT:    retq
 ;
-; X32-AVX2-LABEL: six_or_and_xor:
-; X32-AVX2:       ## %bb.0: ## %entry
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; X32-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; X32-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vxorps %xmm3, %xmm3, %xmm3
-; X32-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-AVX2-NEXT:    vandps %ymm3, %ymm2, %ymm2
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
-; X32-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-AVX2-NEXT:    vandps %ymm3, %ymm2, %ymm2
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1]
-; X32-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
-; X32-AVX2-NEXT:    vxorps %ymm1, %ymm3, %ymm1
-; X32-AVX2-NEXT:    vxorps %ymm2, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1]
-; X32-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vorps %ymm0, %ymm1, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX2-LABEL: six_or_and_xor:
+; X86-AVX2:       ## %bb.0: ## %entry
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; X86-AVX2-NEXT:    vcmpleps %ymm0, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X86-AVX2-NEXT:    vcmpltps %ymm2, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; X86-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-AVX2-NEXT:    vandps %ymm3, %ymm2, %ymm2
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
+; X86-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-AVX2-NEXT:    vandps %ymm3, %ymm2, %ymm2
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1]
+; X86-AVX2-NEXT:    vcmpneqps %ymm3, %ymm0, %ymm3
+; X86-AVX2-NEXT:    vxorps %ymm1, %ymm3, %ymm1
+; X86-AVX2-NEXT:    vxorps %ymm2, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1]
+; X86-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: six_or_and_xor:
 ; X64-AVX2:       ## %bb.0: ## %entry
@@ -1083,6 +1470,38 @@ define <8 x i32> @six_or_and_xor(<8 x float> %x) {
 ; X64-AVX2-NEXT:    vcmpneqps %ymm2, %ymm0, %ymm0
 ; X64-AVX2-NEXT:    vorps %ymm0, %ymm1, %ymm0
 ; X64-AVX2-NEXT:    retq
+;
+; X86-AVX512-LABEL: six_or_and_xor:
+; X86-AVX512:       ## %bb.0: ## %entry
+; X86-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0
+; X86-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k1
+; X86-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
+; X86-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k2
+; X86-AVX512-NEXT:    kxorw %k0, %k2, %k0
+; X86-AVX512-NEXT:    kxorw %k1, %k0, %k0
+; X86-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1
+; X86-AVX512-NEXT:    korw %k1, %k0, %k1
+; X86-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X86-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X86-AVX512-NEXT:    retl
+;
+; X64-AVX512-LABEL: six_or_and_xor:
+; X64-AVX512:       ## %bb.0: ## %entry
+; X64-AVX512-NEXT:    vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
+; X64-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k1
+; X64-AVX512-NEXT:    vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
+; X64-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k2
+; X64-AVX512-NEXT:    kxorw %k0, %k2, %k0
+; X64-AVX512-NEXT:    kxorw %k1, %k0, %k0
+; X64-AVX512-NEXT:    vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
+; X64-AVX512-NEXT:    korw %k1, %k0, %k1
+; X64-AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; X64-AVX512-NEXT:    retq
 entry:
   %cmp = fcmp oge <8 x float> %x, <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
   %cmp1 = fcmp olt <8 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>


        


More information about the llvm-commits mailing list