[llvm] [X86] Fix optmasks handling for AVX10.1-256 (PR #73074)
Evgenii Kudriashov via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 5 15:08:29 PST 2023
https://github.com/e-kud updated https://github.com/llvm/llvm-project/pull/73074
>From 44757e28781ee7ff48fe1d9045b90665183ad2a0 Mon Sep 17 00:00:00 2001
From: Evgenii Kudriashov <evgenii.kudriashov at intel.com>
Date: Tue, 7 Nov 2023 18:27:23 -0800
Subject: [PATCH] [X86] Improve optmasks handling for AVX10.1-256
Quadword opmask instructions are only supported on processors
supporting vector lengths of 512 bits.
---
llvm/lib/Target/X86/X86DomainReassignment.cpp | 35 +-
llvm/lib/Target/X86/X86ISelLowering.cpp | 5 +-
llvm/lib/Target/X86/X86InstrInfo.cpp | 16 +-
llvm/lib/Target/X86/X86Subtarget.h | 3 +-
llvm/test/CodeGen/X86/avx512-mask-op.ll | 772 ++++++++++++++-
llvm/test/CodeGen/X86/avx512-vec-cmp.ll | 899 +++++++++++++++---
llvm/test/CodeGen/X86/avx512bw-mask-op.ll | 105 +-
llvm/test/CodeGen/X86/kshift.ll | 301 ++++++
llvm/test/CodeGen/X86/movmsk-cmp.ll | 798 ++++++++++++++--
llvm/tools/llvm-exegesis/lib/X86/Target.cpp | 3 +-
10 files changed, 2614 insertions(+), 323 deletions(-)
diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp
index bdd86e48fa5438..70f3be0e12ece8 100644
--- a/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -662,37 +662,30 @@ void X86DomainReassignment::initConverters() {
if (STI->hasBWI()) {
createReplacer(X86::MOV32rm, HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm);
- createReplacer(X86::MOV64rm, HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm);
-
createReplacer(X86::MOV32mr, HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
- createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
-
createReplacer(X86::MOV32rr, HasEGPR ? X86::KMOVDkk_EVEX : X86::KMOVDkk);
- createReplacer(X86::MOV64rr, HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk);
-
createReplacer(X86::SHR32ri, X86::KSHIFTRDri);
- createReplacer(X86::SHR64ri, X86::KSHIFTRQri);
-
createReplacer(X86::SHL32ri, X86::KSHIFTLDri);
- createReplacer(X86::SHL64ri, X86::KSHIFTLQri);
-
createReplacer(X86::ADD32rr, X86::KADDDrr);
- createReplacer(X86::ADD64rr, X86::KADDQrr);
-
createReplacer(X86::NOT32r, X86::KNOTDrr);
- createReplacer(X86::NOT64r, X86::KNOTQrr);
-
createReplacer(X86::OR32rr, X86::KORDrr);
- createReplacer(X86::OR64rr, X86::KORQrr);
-
createReplacer(X86::AND32rr, X86::KANDDrr);
- createReplacer(X86::AND64rr, X86::KANDQrr);
-
createReplacer(X86::ANDN32rr, X86::KANDNDrr);
- createReplacer(X86::ANDN64rr, X86::KANDNQrr);
-
createReplacer(X86::XOR32rr, X86::KXORDrr);
- createReplacer(X86::XOR64rr, X86::KXORQrr);
+
+ if (STI->hasEVEX512()) {
+ createReplacer(X86::MOV64rm, HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm);
+ createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
+ createReplacer(X86::MOV64rr, HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk);
+ createReplacer(X86::SHR64ri, X86::KSHIFTRQri);
+ createReplacer(X86::SHL64ri, X86::KSHIFTLQri);
+ createReplacer(X86::ADD64rr, X86::KADDQrr);
+ createReplacer(X86::NOT64r, X86::KNOTQrr);
+ createReplacer(X86::OR64rr, X86::KORQrr);
+ createReplacer(X86::AND64rr, X86::KANDQrr);
+ createReplacer(X86::ANDN64rr, X86::KANDNQrr);
+ createReplacer(X86::XOR64rr, X86::KXORQrr);
+ }
// TODO: KTEST is not a replacement for TEST due to flag differences. Need
// to prove only Z flag is used.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4fca5afb46dd2b..0c2eaa7cf5bf51 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2058,9 +2058,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// AVX512BW..
if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
- addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
+ if (Subtarget.hasEVEX512())
+ addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
+ if (VT == MVT::v64i1 && !Subtarget.hasEVEX512())
+ continue;
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index ea3bf1f101c1e0..254ffe7818c9bb 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -3996,7 +3996,8 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
// anyone.
if (X86::VK16RegClass.contains(SrcReg)) {
if (X86::GR64RegClass.contains(DestReg)) {
- assert(Subtarget.hasBWI());
+ assert(Subtarget.hasBWI() && Subtarget.hasEVEX512() &&
+ "KMOVQ requires BWI with EVEX512");
return HasEGPR ? X86::KMOVQrk_EVEX : X86::KMOVQrk;
}
if (X86::GR32RegClass.contains(DestReg))
@@ -4011,7 +4012,8 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
// anyone.
if (X86::VK16RegClass.contains(DestReg)) {
if (X86::GR64RegClass.contains(SrcReg)) {
- assert(Subtarget.hasBWI());
+ assert(Subtarget.hasBWI() && Subtarget.hasEVEX512() &&
+ "KMOVQ requires BWI with EVEX512");
return HasEGPR ? X86::KMOVQkr_EVEX : X86::KMOVQkr;
}
if (X86::GR32RegClass.contains(SrcReg))
@@ -4125,8 +4127,9 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// All KMASK RegClasses hold the same k registers, can be tested against
// anyone.
else if (X86::VK16RegClass.contains(DestReg, SrcReg))
- Opc = Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk)
- : (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVWkk);
+ Opc = Subtarget.hasBWI() && Subtarget.hasEVEX512()
+ ? (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk)
+ : (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVWkk);
if (!Opc)
Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
@@ -4247,7 +4250,8 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
if (X86::RFP64RegClass.hasSubClassEq(RC))
return Load ? X86::LD_Fp64m : X86::ST_Fp64m;
if (X86::VK64RegClass.hasSubClassEq(RC)) {
- assert(STI.hasBWI() && "KMOVQ requires BWI");
+ assert(STI.hasBWI() && STI.hasEVEX512() &&
+ "KMOVQ requires BWI with EVEX512");
return Load ? (HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm)
: (HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
}
@@ -10523,7 +10527,7 @@ void X86InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
return;
// KXOR is safe to use because it doesn't affect flags.
- unsigned Op = ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr;
+ unsigned Op = ST.hasBWI() && ST.hasEVEX512() ? X86::KXORQrr : X86::KXORWrr;
BuildMI(MBB, Iter, DL, get(Op), Reg)
.addReg(Reg, RegState::Undef)
.addReg(Reg, RegState::Undef);
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index a458b5f9ec8fbb..47d24f4be58a3e 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -244,7 +244,8 @@ class X86Subtarget final : public X86GenSubtargetInfo {
// TODO: Currently we're always allowing widening on CPUs without VLX,
// because for many cases we don't have a better option.
bool canExtendTo512DQ() const {
- return hasAVX512() && (!hasVLX() || getPreferVectorWidth() >= 512);
+ return hasAVX512() && (!hasVLX() || getPreferVectorWidth() >= 512) &&
+ hasEVEX512();
}
bool canExtendTo512BW() const {
return hasBWI() && canExtendTo512DQ();
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 9e689341f7b88e..d2246ee2a33885 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -1,8 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,KNL
-; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefixes=CHECK,SKX
-; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW
-; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512DQ
+; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,EVEX512,KNL
+; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefixes=CHECK,EVEX512,SKX
+; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,EVEX512,AVX512BW
+; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck %s --check-prefixes=CHECK,EVEX512,AVX512DQ
+; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=CHECK,AVX10-256
; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=X86
@@ -131,6 +132,13 @@ define void @mask8_mem(ptr %ptr) {
; AVX512DQ-NEXT: kmovb %k0, (%rdi)
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: mask8_mem:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovb (%rdi), %k0
+; AVX10-256-NEXT: knotb %k0, %k0
+; AVX10-256-NEXT: kmovb %k0, (%rdi)
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: mask8_mem:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -205,6 +213,15 @@ define i16 @mand16_mem(ptr %x, ptr %y) {
; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: mand16_mem:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovw (%rdi), %k0
+; AVX10-256-NEXT: kmovw (%rsi), %k1
+; AVX10-256-NEXT: korw %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: ## kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: mand16_mem:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -257,6 +274,14 @@ define i8 @shuf_test1(i16 %v) nounwind {
; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: shuf_test1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovd %edi, %k0
+; AVX10-256-NEXT: kshiftrw $8, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: ## kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: shuf_test1:
; X86: ## %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
@@ -304,6 +329,15 @@ define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: zext_test1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpnleud %ymm2, %ymm0, %k0
+; AVX10-256-NEXT: kshiftrb $5, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: andl $1, %eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: zext_test1:
; X86: ## %bb.0:
; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
@@ -359,6 +393,16 @@ define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: zext_test2:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpnleud %ymm2, %ymm0, %k0
+; AVX10-256-NEXT: kshiftrb $5, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: andl $1, %eax
+; AVX10-256-NEXT: ## kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: zext_test2:
; X86: ## %bb.0:
; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
@@ -415,6 +459,16 @@ define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: zext_test3:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpnleud %ymm2, %ymm0, %k0
+; AVX10-256-NEXT: kshiftrb $5, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: andb $1, %al
+; AVX10-256-NEXT: ## kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: zext_test3:
; X86: ## %bb.0:
; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
@@ -506,6 +560,14 @@ define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test4:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpgtq %ymm3, %ymm2, %k1
+; AVX10-256-NEXT: vpcmpleq %ymm1, %ymm0, %k0 {%k1}
+; AVX10-256-NEXT: vpmovm2d %k0, %xmm0
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test4:
; X86: ## %bb.0:
; X86-NEXT: vpcmpgtq %ymm3, %ymm2, %k1
@@ -567,6 +629,13 @@ define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test5:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpgtq %xmm0, %xmm1, %k1
+; AVX10-256-NEXT: vpcmpleq %xmm3, %xmm2, %k0 {%k1}
+; AVX10-256-NEXT: vpmovm2q %k0, %xmm0
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test5:
; X86: ## %bb.0:
; X86-NEXT: vpcmpgtq %xmm0, %xmm1, %k1
@@ -645,6 +714,14 @@ define void @test7(<8 x i1> %mask) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test7:
+; AVX10-256: ## %bb.0: ## %allocas
+; AVX10-256-NEXT: vpsllw $15, %xmm0, %xmm0
+; AVX10-256-NEXT: vpmovw2m %xmm0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: orb $85, %al
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test7:
; X86: ## %bb.0: ## %allocas
; X86-NEXT: vpsllw $15, %xmm0, %xmm0
@@ -732,6 +809,24 @@ define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test8:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: cmpl %esi, %edi
+; AVX10-256-NEXT: jg LBB17_1
+; AVX10-256-NEXT: ## %bb.2:
+; AVX10-256-NEXT: kxorw %k0, %k0, %k0
+; AVX10-256-NEXT: vpmovm2b %k0, %xmm0
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+; AVX10-256-NEXT: LBB17_1:
+; AVX10-256-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX10-256-NEXT: vpcmpgtd %ymm2, %ymm0, %k0
+; AVX10-256-NEXT: vpcmpgtd %ymm2, %ymm1, %k1
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT: vpmovm2b %k0, %xmm0
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test8:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -821,6 +916,20 @@ define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test9:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: cmpl %esi, %edi
+; AVX10-256-NEXT: jg LBB18_1
+; AVX10-256-NEXT: ## %bb.2:
+; AVX10-256-NEXT: vpsllw $7, %xmm1, %xmm0
+; AVX10-256-NEXT: jmp LBB18_3
+; AVX10-256-NEXT: LBB18_1:
+; AVX10-256-NEXT: vpsllw $7, %xmm0, %xmm0
+; AVX10-256-NEXT: LBB18_3:
+; AVX10-256-NEXT: vpmovb2m %xmm0, %k0
+; AVX10-256-NEXT: vpmovm2b %k0, %xmm0
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test9:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -907,6 +1016,20 @@ define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test10:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: cmpl %esi, %edi
+; AVX10-256-NEXT: jg LBB19_1
+; AVX10-256-NEXT: ## %bb.2:
+; AVX10-256-NEXT: vpsllw $15, %xmm1, %xmm0
+; AVX10-256-NEXT: jmp LBB19_3
+; AVX10-256-NEXT: LBB19_1:
+; AVX10-256-NEXT: vpsllw $15, %xmm0, %xmm0
+; AVX10-256-NEXT: LBB19_3:
+; AVX10-256-NEXT: vpmovw2m %xmm0, %k0
+; AVX10-256-NEXT: vpmovm2w %k0, %xmm0
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test10:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -989,6 +1112,20 @@ define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test11:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: cmpl %esi, %edi
+; AVX10-256-NEXT: jg LBB20_1
+; AVX10-256-NEXT: ## %bb.2:
+; AVX10-256-NEXT: vpslld $31, %xmm1, %xmm0
+; AVX10-256-NEXT: jmp LBB20_3
+; AVX10-256-NEXT: LBB20_1:
+; AVX10-256-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX10-256-NEXT: LBB20_3:
+; AVX10-256-NEXT: vpmovd2m %xmm0, %k0
+; AVX10-256-NEXT: vpmovm2d %k0, %xmm0
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test11:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1120,6 +1257,16 @@ define <16 x i1> @test15(i32 %x, i32 %y) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test15:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: cmpl %esi, %edi
+; AVX10-256-NEXT: movl $21845, %eax ## imm = 0x5555
+; AVX10-256-NEXT: movl $1, %ecx
+; AVX10-256-NEXT: cmovgl %eax, %ecx
+; AVX10-256-NEXT: kmovd %ecx, %k0
+; AVX10-256-NEXT: vpmovm2b %k0, %xmm0
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test15:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1231,6 +1378,23 @@ define <64 x i8> @test16(i64 %x) {
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test16:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovd %edi, %k0
+; AVX10-256-NEXT: shrq $32, %rdi
+; AVX10-256-NEXT: kmovd %edi, %k1
+; AVX10-256-NEXT: movl $-33, %eax
+; AVX10-256-NEXT: kmovd %eax, %k2
+; AVX10-256-NEXT: kandd %k2, %k0, %k0
+; AVX10-256-NEXT: movb $1, %al
+; AVX10-256-NEXT: kmovd %eax, %k2
+; AVX10-256-NEXT: kshiftld $31, %k2, %k2
+; AVX10-256-NEXT: kshiftrd $26, %k2, %k2
+; AVX10-256-NEXT: kord %k2, %k0, %k0
+; AVX10-256-NEXT: vpmovm2b %k0, %ymm0
+; AVX10-256-NEXT: vpmovm2b %k1, %ymm1
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test16:
; X86: ## %bb.0:
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0
@@ -1350,6 +1514,24 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test17:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovd %edi, %k0
+; AVX10-256-NEXT: shrq $32, %rdi
+; AVX10-256-NEXT: kmovd %edi, %k1
+; AVX10-256-NEXT: cmpl %edx, %esi
+; AVX10-256-NEXT: setg %al
+; AVX10-256-NEXT: movl $-33, %ecx
+; AVX10-256-NEXT: kmovd %ecx, %k2
+; AVX10-256-NEXT: kandd %k2, %k0, %k0
+; AVX10-256-NEXT: kmovd %eax, %k2
+; AVX10-256-NEXT: kshiftld $31, %k2, %k2
+; AVX10-256-NEXT: kshiftrd $26, %k2, %k2
+; AVX10-256-NEXT: kord %k2, %k0, %k0
+; AVX10-256-NEXT: vpmovm2b %k0, %ymm0
+; AVX10-256-NEXT: vpmovm2b %k1, %ymm1
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test17:
; X86: ## %bb.0:
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0
@@ -1455,6 +1637,24 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test18:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovd %edi, %k0
+; AVX10-256-NEXT: kmovd %esi, %k1
+; AVX10-256-NEXT: kshiftrw $8, %k1, %k2
+; AVX10-256-NEXT: kshiftrw $9, %k1, %k1
+; AVX10-256-NEXT: movb $-65, %al
+; AVX10-256-NEXT: kmovd %eax, %k3
+; AVX10-256-NEXT: kandb %k3, %k0, %k0
+; AVX10-256-NEXT: kshiftlb $6, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: kshiftlb $1, %k0, %k0
+; AVX10-256-NEXT: kshiftrb $1, %k0, %k0
+; AVX10-256-NEXT: kshiftlb $7, %k2, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: vpmovm2w %k0, %xmm0
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test18:
; X86: ## %bb.0:
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
@@ -1521,6 +1721,15 @@ define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
; AVX512DQ-NEXT: vpandq %zmm0, %zmm1, %zmm0
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test21:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllw $7, %ymm2, %ymm2
+; AVX10-256-NEXT: vpmovb2m %ymm2, %k1
+; AVX10-256-NEXT: vmovdqu16 %ymm0, %ymm0 {%k1} {z}
+; AVX10-256-NEXT: kshiftrd $16, %k1, %k1
+; AVX10-256-NEXT: vmovdqu16 %ymm1, %ymm1 {%k1} {z}
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test21:
; X86: ## %bb.0:
; X86-NEXT: vpsllw $7, %ymm1, %ymm1
@@ -1571,6 +1780,13 @@ define void @test22(<4 x i1> %a, ptr %addr) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test22:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX10-256-NEXT: vpmovd2m %xmm0, %k0
+; AVX10-256-NEXT: kmovb %k0, (%rdi)
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test22:
; X86: ## %bb.0:
; X86-NEXT: vpslld $31, %xmm0, %xmm0
@@ -1622,6 +1838,13 @@ define void @test23(<2 x i1> %a, ptr %addr) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test23:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllq $63, %xmm0, %xmm0
+; AVX10-256-NEXT: vpmovq2m %xmm0, %k0
+; AVX10-256-NEXT: kmovb %k0, (%rdi)
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test23:
; X86: ## %bb.0:
; X86-NEXT: vpsllq $63, %xmm0, %xmm0
@@ -1672,6 +1895,15 @@ define void @store_v1i1(<1 x i1> %c , ptr %ptr) {
; AVX512DQ-NEXT: kmovb %k0, (%rsi)
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: store_v1i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovd %edi, %k0
+; AVX10-256-NEXT: knotw %k0, %k0
+; AVX10-256-NEXT: kshiftlb $7, %k0, %k0
+; AVX10-256-NEXT: kshiftrb $7, %k0, %k0
+; AVX10-256-NEXT: kmovb %k0, (%rsi)
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: store_v1i1:
; X86: ## %bb.0:
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
@@ -1730,6 +1962,16 @@ define void @store_v2i1(<2 x i1> %c , ptr %ptr) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: store_v2i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllq $63, %xmm0, %xmm0
+; AVX10-256-NEXT: vpmovq2m %xmm0, %k0
+; AVX10-256-NEXT: knotw %k0, %k0
+; AVX10-256-NEXT: kshiftlb $6, %k0, %k0
+; AVX10-256-NEXT: kshiftrb $6, %k0, %k0
+; AVX10-256-NEXT: kmovb %k0, (%rdi)
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: store_v2i1:
; X86: ## %bb.0:
; X86-NEXT: vpsllq $63, %xmm0, %xmm0
@@ -1789,6 +2031,16 @@ define void @store_v4i1(<4 x i1> %c , ptr %ptr) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: store_v4i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX10-256-NEXT: vpmovd2m %xmm0, %k0
+; AVX10-256-NEXT: knotw %k0, %k0
+; AVX10-256-NEXT: kshiftlb $4, %k0, %k0
+; AVX10-256-NEXT: kshiftrb $4, %k0, %k0
+; AVX10-256-NEXT: kmovb %k0, (%rdi)
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: store_v4i1:
; X86: ## %bb.0:
; X86-NEXT: vpslld $31, %xmm0, %xmm0
@@ -1843,6 +2095,14 @@ define void @store_v8i1(<8 x i1> %c , ptr %ptr) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: store_v8i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllw $15, %xmm0, %xmm0
+; AVX10-256-NEXT: vpmovw2m %xmm0, %k0
+; AVX10-256-NEXT: knotb %k0, %k0
+; AVX10-256-NEXT: kmovb %k0, (%rdi)
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: store_v8i1:
; X86: ## %bb.0:
; X86-NEXT: vpsllw $15, %xmm0, %xmm0
@@ -1893,6 +2153,14 @@ define void @store_v16i1(<16 x i1> %c , ptr %ptr) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: store_v16i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllw $7, %xmm0, %xmm0
+; AVX10-256-NEXT: vpmovb2m %xmm0, %k0
+; AVX10-256-NEXT: knotw %k0, %k0
+; AVX10-256-NEXT: kmovw %k0, (%rdi)
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: store_v16i1:
; X86: ## %bb.0:
; X86-NEXT: vpsllw $7, %xmm0, %xmm0
@@ -2008,6 +2276,12 @@ define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) {
; AVX512DQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test_build_vec_v32i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX10-256-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test_build_vec_v32i1:
; X86: ## %bb.0:
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0
@@ -2041,6 +2315,12 @@ define <32 x i16> @test_build_vec_v32i1_optsize(<32 x i16> %x) optsize {
; AVX512DQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test_build_vec_v32i1_optsize:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX10-256-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test_build_vec_v32i1_optsize:
; X86: ## %bb.0:
; X86-NEXT: movl $1497715861, %eax ## imm = 0x59455495
@@ -2076,6 +2356,12 @@ define <32 x i16> @test_build_vec_v32i1_pgso(<32 x i16> %x) !prof !14 {
; AVX512DQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test_build_vec_v32i1_pgso:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX10-256-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test_build_vec_v32i1_pgso:
; X86: ## %bb.0:
; X86-NEXT: movl $1497715861, %eax ## imm = 0x59455495
@@ -2107,6 +2393,12 @@ define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) {
; AVX512DQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test_build_vec_v64i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX10-256-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test_build_vec_v64i1:
; X86: ## %bb.0:
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0
@@ -2182,6 +2474,31 @@ define void @ktest_1(<8 x double> %in, ptr %base) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: ktest_1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpgtpd (%rdi), %ymm0, %k1
+; AVX10-256-NEXT: vcmpgtpd 32(%rdi), %ymm1, %k2
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k0
+; AVX10-256-NEXT: korb %k0, %k1, %k0
+; AVX10-256-NEXT: vmovupd 40(%rdi), %ymm2 {%k2} {z}
+; AVX10-256-NEXT: vmovupd 8(%rdi), %ymm3 {%k1} {z}
+; AVX10-256-NEXT: vcmpltpd %ymm3, %ymm0, %k1
+; AVX10-256-NEXT: vcmpltpd %ymm2, %ymm1, %k2
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: ktestb %k1, %k0
+; AVX10-256-NEXT: je LBB44_2
+; AVX10-256-NEXT: ## %bb.1: ## %L1
+; AVX10-256-NEXT: vmovapd %ymm0, (%rdi)
+; AVX10-256-NEXT: vmovapd %ymm1, 32(%rdi)
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+; AVX10-256-NEXT: LBB44_2: ## %L2
+; AVX10-256-NEXT: vmovapd %ymm0, 8(%rdi)
+; AVX10-256-NEXT: vmovapd %ymm1, 40(%rdi)
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: ktest_1:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2316,6 +2633,43 @@ define void @ktest_2(<32 x float> %in, ptr %base) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: ktest_2:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpgtps (%rdi), %ymm0, %k1
+; AVX10-256-NEXT: vcmpgtps 32(%rdi), %ymm1, %k2
+; AVX10-256-NEXT: kunpckbw %k1, %k2, %k0
+; AVX10-256-NEXT: vcmpgtps 64(%rdi), %ymm2, %k3
+; AVX10-256-NEXT: vcmpgtps 96(%rdi), %ymm3, %k4
+; AVX10-256-NEXT: kunpckbw %k3, %k4, %k5
+; AVX10-256-NEXT: kunpckwd %k0, %k5, %k0
+; AVX10-256-NEXT: vmovups 100(%rdi), %ymm4 {%k4} {z}
+; AVX10-256-NEXT: vmovups 68(%rdi), %ymm5 {%k3} {z}
+; AVX10-256-NEXT: vmovups 36(%rdi), %ymm6 {%k2} {z}
+; AVX10-256-NEXT: vmovups 4(%rdi), %ymm7 {%k1} {z}
+; AVX10-256-NEXT: vcmpltps %ymm7, %ymm0, %k1
+; AVX10-256-NEXT: vcmpltps %ymm6, %ymm1, %k2
+; AVX10-256-NEXT: kunpckbw %k1, %k2, %k1
+; AVX10-256-NEXT: vcmpltps %ymm5, %ymm2, %k2
+; AVX10-256-NEXT: vcmpltps %ymm4, %ymm3, %k3
+; AVX10-256-NEXT: kunpckbw %k2, %k3, %k2
+; AVX10-256-NEXT: kunpckwd %k1, %k2, %k1
+; AVX10-256-NEXT: kortestd %k1, %k0
+; AVX10-256-NEXT: je LBB45_2
+; AVX10-256-NEXT: ## %bb.1: ## %L1
+; AVX10-256-NEXT: vmovaps %ymm0, (%rdi)
+; AVX10-256-NEXT: vmovaps %ymm1, 32(%rdi)
+; AVX10-256-NEXT: vmovaps %ymm2, 64(%rdi)
+; AVX10-256-NEXT: vmovaps %ymm3, 96(%rdi)
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+; AVX10-256-NEXT: LBB45_2: ## %L2
+; AVX10-256-NEXT: vmovaps %ymm0, 4(%rdi)
+; AVX10-256-NEXT: vmovaps %ymm1, 36(%rdi)
+; AVX10-256-NEXT: vmovaps %ymm2, 68(%rdi)
+; AVX10-256-NEXT: vmovaps %ymm3, 100(%rdi)
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: ktest_2:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2390,6 +2744,14 @@ define <8 x i64> @load_8i1(ptr %a) {
; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: load_8i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovb (%rdi), %k0
+; AVX10-256-NEXT: vpmovm2q %k0, %ymm0
+; AVX10-256-NEXT: kshiftrb $4, %k0, %k0
+; AVX10-256-NEXT: vpmovm2q %k0, %ymm1
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: load_8i1:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2426,6 +2788,14 @@ define <16 x i32> @load_16i1(ptr %a) {
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: load_16i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovb (%rdi), %k0
+; AVX10-256-NEXT: kmovb 1(%rdi), %k1
+; AVX10-256-NEXT: vpmovm2d %k0, %ymm0
+; AVX10-256-NEXT: vpmovm2d %k1, %ymm1
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: load_16i1:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2472,6 +2842,12 @@ define <2 x i16> @load_2i1(ptr %a) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: load_2i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovb (%rdi), %k0
+; AVX10-256-NEXT: vpmovm2w %k0, %xmm0
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: load_2i1:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2518,6 +2894,12 @@ define <4 x i16> @load_4i1(ptr %a) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: load_4i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovb (%rdi), %k0
+; AVX10-256-NEXT: vpmovm2w %k0, %xmm0
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: load_4i1:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2564,6 +2946,14 @@ define <32 x i16> @load_32i1(ptr %a) {
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: load_32i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovw (%rdi), %k0
+; AVX10-256-NEXT: kmovw 2(%rdi), %k1
+; AVX10-256-NEXT: vpmovm2w %k0, %ymm0
+; AVX10-256-NEXT: vpmovm2w %k1, %ymm1
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: load_32i1:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2626,6 +3016,14 @@ define <64 x i8> @load_64i1(ptr %a) {
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: load_64i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovd (%rdi), %k0
+; AVX10-256-NEXT: kmovd 4(%rdi), %k1
+; AVX10-256-NEXT: vpmovm2b %k0, %ymm0
+; AVX10-256-NEXT: vpmovm2b %k1, %ymm1
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: load_64i1:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2673,6 +3071,13 @@ define void @store_8i1(ptr %a, <8 x i1> %v) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: store_8i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllw $15, %xmm0, %xmm0
+; AVX10-256-NEXT: vpmovw2m %xmm0, %k0
+; AVX10-256-NEXT: kmovb %k0, (%rdi)
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: store_8i1:
; X86: ## %bb.0:
; X86-NEXT: vpsllw $15, %xmm0, %xmm0
@@ -2720,6 +3125,13 @@ define void @store_8i1_1(ptr %a, <8 x i16> %v) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: store_8i1_1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllw $15, %xmm0, %xmm0
+; AVX10-256-NEXT: vpmovw2m %xmm0, %k0
+; AVX10-256-NEXT: kmovb %k0, (%rdi)
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: store_8i1_1:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2766,6 +3178,13 @@ define void @store_16i1(ptr %a, <16 x i1> %v) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: store_16i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllw $7, %xmm0, %xmm0
+; AVX10-256-NEXT: vpmovb2m %xmm0, %k0
+; AVX10-256-NEXT: kmovw %k0, (%rdi)
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: store_16i1:
; X86: ## %bb.0:
; X86-NEXT: vpsllw $7, %xmm0, %xmm0
@@ -2822,6 +3241,14 @@ define void @store_32i1(ptr %a, <32 x i1> %v) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: store_32i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllw $7, %ymm0, %ymm0
+; AVX10-256-NEXT: vpmovb2m %ymm0, %k0
+; AVX10-256-NEXT: kmovd %k0, (%rdi)
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: store_32i1:
; X86: ## %bb.0:
; X86-NEXT: vpsllw $7, %ymm0, %ymm0
@@ -2879,6 +3306,17 @@ define void @store_32i1_1(ptr %a, <32 x i16> %v) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: store_32i1_1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllw $15, %ymm0, %ymm0
+; AVX10-256-NEXT: vpmovw2m %ymm0, %k0
+; AVX10-256-NEXT: vpsllw $15, %ymm1, %ymm0
+; AVX10-256-NEXT: vpmovw2m %ymm0, %k1
+; AVX10-256-NEXT: kunpckwd %k0, %k1, %k0
+; AVX10-256-NEXT: kmovd %k0, (%rdi)
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: store_32i1_1:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -3787,6 +4225,17 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
; AVX512DQ-NEXT: kmovw %k0, (%rdi)
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: store_64i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllw $7, %ymm0, %ymm0
+; AVX10-256-NEXT: vpmovb2m %ymm0, %k0
+; AVX10-256-NEXT: vpsllw $7, %ymm1, %ymm0
+; AVX10-256-NEXT: vpmovb2m %ymm0, %k1
+; AVX10-256-NEXT: kmovd %k1, 4(%rdi)
+; AVX10-256-NEXT: kmovd %k0, (%rdi)
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: store_64i1:
; X86: ## %bb.0:
; X86-NEXT: vpsllw $7, %zmm0, %zmm0
@@ -3834,6 +4283,14 @@ define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test_bitcast_v8i1_zext:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: kmovb %k0, %eax
+; AVX10-256-NEXT: addl %eax, %eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test_bitcast_v8i1_zext:
; X86: ## %bb.0:
; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
@@ -3850,13 +4307,23 @@ define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
}
define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) {
-; CHECK-LABEL: test_bitcast_v16i1_zext:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: addl %eax, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; EVEX512-LABEL: test_bitcast_v16i1_zext:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vptestnmd %zmm0, %zmm0, %k0
+; EVEX512-NEXT: kmovw %k0, %eax
+; EVEX512-NEXT: addl %eax, %eax
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
+;
+; AVX10-256-LABEL: test_bitcast_v16i1_zext:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT: kmovw %k0, %eax
+; AVX10-256-NEXT: addl %eax, %eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
;
; X86-LABEL: test_bitcast_v16i1_zext:
; X86: ## %bb.0:
@@ -4066,6 +4533,27 @@ define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
; AVX512DQ-NEXT: addq $8, %rsp
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: ktest_signed:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpor %ymm3, %ymm1, %ymm1
+; AVX10-256-NEXT: vpor %ymm2, %ymm0, %ymm0
+; AVX10-256-NEXT: vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: testw %ax, %ax
+; AVX10-256-NEXT: jle LBB66_1
+; AVX10-256-NEXT: ## %bb.2: ## %bb.2
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+; AVX10-256-NEXT: LBB66_1: ## %bb.1
+; AVX10-256-NEXT: pushq %rax
+; AVX10-256-NEXT: .cfi_def_cfa_offset 16
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: callq _foo
+; AVX10-256-NEXT: addq $8, %rsp
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: ktest_signed:
; X86: ## %bb.0:
; X86-NEXT: vpord %zmm1, %zmm0, %zmm0
@@ -4099,21 +4587,40 @@ declare void @foo()
; Make sure we can use the ZF/CF flags from kortest to check for all ones.
define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) {
-; CHECK-LABEL: ktest_allones:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0
-; CHECK-NEXT: vptestmd %zmm0, %zmm0, %k0
-; CHECK-NEXT: kortestw %k0, %k0
-; CHECK-NEXT: je LBB67_2
-; CHECK-NEXT: ## %bb.1: ## %bb.1
-; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: callq _foo
-; CHECK-NEXT: addq $8, %rsp
-; CHECK-NEXT: LBB67_2: ## %bb.2
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; EVEX512-LABEL: ktest_allones:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpord %zmm1, %zmm0, %zmm0
+; EVEX512-NEXT: vptestmd %zmm0, %zmm0, %k0
+; EVEX512-NEXT: kortestw %k0, %k0
+; EVEX512-NEXT: je LBB67_2
+; EVEX512-NEXT: ## %bb.1: ## %bb.1
+; EVEX512-NEXT: pushq %rax
+; EVEX512-NEXT: .cfi_def_cfa_offset 16
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: callq _foo
+; EVEX512-NEXT: addq $8, %rsp
+; EVEX512-NEXT: LBB67_2: ## %bb.2
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
+;
+; AVX10-256-LABEL: ktest_allones:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpor %ymm3, %ymm1, %ymm1
+; AVX10-256-NEXT: vpor %ymm2, %ymm0, %ymm0
+; AVX10-256-NEXT: vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT: kortestw %k0, %k0
+; AVX10-256-NEXT: jb LBB67_2
+; AVX10-256-NEXT: ## %bb.1: ## %bb.1
+; AVX10-256-NEXT: pushq %rax
+; AVX10-256-NEXT: .cfi_def_cfa_offset 16
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: callq _foo
+; AVX10-256-NEXT: addq $8, %rsp
+; AVX10-256-NEXT: LBB67_2: ## %bb.2
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
;
; X86-LABEL: ktest_allones:
; X86: ## %bb.0:
@@ -4182,6 +4689,14 @@ define <8 x i64> @mask_widening(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i
; AVX512DQ-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: mask_widening:
+; AVX10-256: ## %bb.0: ## %entry
+; AVX10-256-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
+; AVX10-256-NEXT: vpblendmd %ymm6, %ymm4, %ymm0 {%k1}
+; AVX10-256-NEXT: kshiftrw $8, %k1, %k1
+; AVX10-256-NEXT: vpblendmd %ymm7, %ymm5, %ymm1 {%k1}
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: mask_widening:
; X86: ## %bb.0: ## %entry
; X86-NEXT: pushl %ebp
@@ -4239,6 +4754,12 @@ define void @store_v128i1_constant(ptr %R) {
; AVX512DQ-NEXT: vmovaps %xmm0, (%rdi)
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: store_v128i1_constant:
+; AVX10-256: ## %bb.0: ## %entry
+; AVX10-256-NEXT: vmovaps {{.*#+}} xmm0 = [4294963197,3758096251,4294959101,3221225403]
+; AVX10-256-NEXT: vmovaps %xmm0, (%rdi)
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: store_v128i1_constant:
; X86: ## %bb.0: ## %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -4302,13 +4823,24 @@ entry:
; Make sure we bring the -1 constant into the mask domain.
define void @mask_not_cast(ptr, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>) {
-; CHECK-LABEL: mask_not_cast:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpnleud %zmm3, %zmm2, %k1
-; CHECK-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1}
-; CHECK-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; EVEX512-LABEL: mask_not_cast:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpnleud %zmm3, %zmm2, %k1
+; EVEX512-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1}
+; EVEX512-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
+;
+; AVX10-256-LABEL: mask_not_cast:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpnleud %ymm6, %ymm4, %k1
+; AVX10-256-NEXT: vpcmpnleud %ymm7, %ymm5, %k2
+; AVX10-256-NEXT: vptestmd %ymm1, %ymm3, %k2 {%k2}
+; AVX10-256-NEXT: vmovdqu32 %ymm1, 32(%rdi) {%k2}
+; AVX10-256-NEXT: vptestmd %ymm0, %ymm2, %k1 {%k1}
+; AVX10-256-NEXT: vmovdqu32 %ymm0, (%rdi) {%k1}
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
;
; X86-LABEL: mask_not_cast:
; X86: ## %bb.0:
@@ -4436,6 +4968,27 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
; AVX512DQ-NEXT: addq $8, %rsp
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: ktest_3:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmd %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: ktestb %k1, %k0
+; AVX10-256-NEXT: je LBB74_1
+; AVX10-256-NEXT: ## %bb.2: ## %exit
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+; AVX10-256-NEXT: LBB74_1: ## %bar
+; AVX10-256-NEXT: pushq %rax
+; AVX10-256-NEXT: .cfi_def_cfa_offset 16
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: callq _foo
+; AVX10-256-NEXT: addq $8, %rsp
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: ktest_3:
; X86: ## %bb.0:
; X86-NEXT: vptestnmd %ymm0, %ymm0, %k0
@@ -4564,6 +5117,39 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
; AVX512DQ-NEXT: addq $8, %rsp
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: ktest_4:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm4, %ymm4, %k1
+; AVX10-256-NEXT: vptestnmq %ymm5, %ymm5, %k2
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: vptestnmq %ymm6, %ymm6, %k2
+; AVX10-256-NEXT: vptestnmq %ymm7, %ymm7, %k3
+; AVX10-256-NEXT: kshiftlb $4, %k3, %k3
+; AVX10-256-NEXT: korb %k3, %k2, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: ktestb %k1, %k0
+; AVX10-256-NEXT: je LBB75_1
+; AVX10-256-NEXT: ## %bb.2: ## %exit
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+; AVX10-256-NEXT: LBB75_1: ## %bar
+; AVX10-256-NEXT: pushq %rax
+; AVX10-256-NEXT: .cfi_def_cfa_offset 16
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: callq _foo
+; AVX10-256-NEXT: addq $8, %rsp
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: ktest_4:
; X86: ## %bb.0:
; X86-NEXT: vptestnmq %zmm0, %zmm0, %k0
@@ -4690,6 +5276,35 @@ define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
; AVX512DQ-NEXT: addq $8, %rsp
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: ktest_5:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT: vptestnmd %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmd %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kunpckbw %k1, %k2, %k1
+; AVX10-256-NEXT: korw %k1, %k0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm4, %ymm4, %k1
+; AVX10-256-NEXT: vptestnmd %ymm5, %ymm5, %k2
+; AVX10-256-NEXT: kunpckbw %k1, %k2, %k1
+; AVX10-256-NEXT: vptestnmd %ymm6, %ymm6, %k2
+; AVX10-256-NEXT: vptestnmd %ymm7, %ymm7, %k3
+; AVX10-256-NEXT: kunpckbw %k2, %k3, %k2
+; AVX10-256-NEXT: korw %k2, %k1, %k1
+; AVX10-256-NEXT: ktestw %k1, %k0
+; AVX10-256-NEXT: je LBB76_1
+; AVX10-256-NEXT: ## %bb.2: ## %exit
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+; AVX10-256-NEXT: LBB76_1: ## %bar
+; AVX10-256-NEXT: pushq %rax
+; AVX10-256-NEXT: .cfi_def_cfa_offset 16
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: callq _foo
+; AVX10-256-NEXT: addq $8, %rsp
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: ktest_5:
; X86: ## %bb.0:
; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
@@ -4850,6 +5465,35 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
; AVX512DQ-NEXT: addq $8, %rsp
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: ktest_6:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vptestnmw %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmw %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kunpckwd %k0, %k1, %k0
+; AVX10-256-NEXT: vptestnmw %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmw %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kunpckwd %k1, %k2, %k1
+; AVX10-256-NEXT: kord %k1, %k0, %k0
+; AVX10-256-NEXT: vptestnmw %ymm4, %ymm4, %k1
+; AVX10-256-NEXT: vptestnmw %ymm5, %ymm5, %k2
+; AVX10-256-NEXT: kunpckwd %k1, %k2, %k1
+; AVX10-256-NEXT: vptestnmw %ymm6, %ymm6, %k2
+; AVX10-256-NEXT: vptestnmw %ymm7, %ymm7, %k3
+; AVX10-256-NEXT: kunpckwd %k2, %k3, %k2
+; AVX10-256-NEXT: kord %k2, %k1, %k1
+; AVX10-256-NEXT: ktestd %k1, %k0
+; AVX10-256-NEXT: je LBB77_1
+; AVX10-256-NEXT: ## %bb.2: ## %exit
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+; AVX10-256-NEXT: LBB77_1: ## %bar
+; AVX10-256-NEXT: pushq %rax
+; AVX10-256-NEXT: .cfi_def_cfa_offset 16
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: callq _foo
+; AVX10-256-NEXT: addq $8, %rsp
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: ktest_6:
; X86: ## %bb.0:
; X86-NEXT: vptestnmw %zmm0, %zmm0, %k0
@@ -5006,6 +5650,35 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
; AVX512DQ-NEXT: addq $8, %rsp
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: ktest_7:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vptestnmb %ymm1, %ymm1, %k0
+; AVX10-256-NEXT: vptestnmb %ymm0, %ymm0, %k1
+; AVX10-256-NEXT: vptestnmb %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kord %k2, %k0, %k0
+; AVX10-256-NEXT: vptestnmb %ymm2, %ymm2, %k2
+; AVX10-256-NEXT: kord %k2, %k1, %k1
+; AVX10-256-NEXT: vptestnmb %ymm5, %ymm5, %k2
+; AVX10-256-NEXT: vptestnmb %ymm4, %ymm4, %k3
+; AVX10-256-NEXT: vptestnmb %ymm7, %ymm7, %k4
+; AVX10-256-NEXT: kord %k4, %k2, %k2
+; AVX10-256-NEXT: kandd %k2, %k0, %k0
+; AVX10-256-NEXT: vptestnmb %ymm6, %ymm6, %k2
+; AVX10-256-NEXT: kord %k2, %k3, %k2
+; AVX10-256-NEXT: kandd %k2, %k1, %k1
+; AVX10-256-NEXT: kortestd %k0, %k1
+; AVX10-256-NEXT: je LBB78_1
+; AVX10-256-NEXT: ## %bb.2: ## %exit
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+; AVX10-256-NEXT: LBB78_1: ## %bar
+; AVX10-256-NEXT: pushq %rax
+; AVX10-256-NEXT: .cfi_def_cfa_offset 16
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: callq _foo
+; AVX10-256-NEXT: addq $8, %rsp
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: ktest_7:
; X86: ## %bb.0:
; X86-NEXT: vptestnmb %zmm0, %zmm0, %k0
@@ -5104,6 +5777,21 @@ define <64 x i1> @mask64_insert(i32 %a) {
; AVX512DQ-NEXT: movl $-131075, 2(%rdi) ## imm = 0xFFFDFFFD
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: mask64_insert:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovd %edi, %k0
+; AVX10-256-NEXT: kshiftld $31, %k0, %k0
+; AVX10-256-NEXT: kshiftrd $31, %k0, %k0
+; AVX10-256-NEXT: movl $-131076, %eax ## imm = 0xFFFDFFFC
+; AVX10-256-NEXT: kmovd %eax, %k1
+; AVX10-256-NEXT: kshiftrd $1, %k1, %k1
+; AVX10-256-NEXT: kshiftld $1, %k1, %k1
+; AVX10-256-NEXT: kord %k0, %k1, %k0
+; AVX10-256-NEXT: vpmovm2b %k0, %ymm0
+; AVX10-256-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX10-256-NEXT: ## ymm1 = mem[0,1,0,1]
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: mask64_insert:
; X86: ## %bb.0:
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
@@ -5240,6 +5928,15 @@ define <1 x i1> @usub_sat_v1i1(<1 x i1> %x, <1 x i1> %y) nounwind {
; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: usub_sat_v1i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovd %esi, %k0
+; AVX10-256-NEXT: kmovd %edi, %k1
+; AVX10-256-NEXT: kandnw %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: ## kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: usub_sat_v1i1:
; X86: ## %bb.0:
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
@@ -5309,6 +6006,15 @@ define <1 x i1> @ssub_sat_v1i1(<1 x i1> %x, <1 x i1> %y) nounwind {
; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: ssub_sat_v1i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovd %esi, %k0
+; AVX10-256-NEXT: kmovd %edi, %k1
+; AVX10-256-NEXT: kandnw %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: ## kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: ssub_sat_v1i1:
; X86: ## %bb.0:
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
diff --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
index e4c62fca5bd57a..a97426f7a20904 100644
--- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
@@ -1,36 +1,61 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=KNL
-; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
-; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefixes=EVEX512,AVX512,KNL
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=EVEX512,AVX512,AVX512BW
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefixes=EVEX512,SKX
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=AVX10-256
define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
-; CHECK-LABEL: test1:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vcmpleps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x02]
-; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test1:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vcmpleps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x02]
+; EVEX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpleps %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xcb,0x02]
+; AVX10-256-NEXT: vcmpleps %ymm2, %ymm0, %k2 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xd2,0x02]
+; AVX10-256-NEXT: vblendmps %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x65,0xc0]
+; AVX10-256-NEXT: vblendmps %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x65,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = fcmp ole <16 x float> %x, %y
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
ret <16 x float> %max
}
define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
-; CHECK-LABEL: test2:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vcmplepd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x02]
-; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test2:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vcmplepd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x02]
+; EVEX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test2:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmplepd %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x28,0xc2,0xcb,0x02]
+; AVX10-256-NEXT: vcmplepd %ymm2, %ymm0, %k2 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xd2,0x02]
+; AVX10-256-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x65,0xc0]
+; AVX10-256-NEXT: vblendmpd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x65,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = fcmp ole <8 x double> %x, %y
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
ret <8 x double> %max
}
define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, ptr %yp) nounwind {
-; CHECK-LABEL: test3:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x0f]
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test3:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x0f]
+; EVEX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test3:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpeqd 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x76,0x4f,0x01]
+; AVX10-256-NEXT: vpcmpeqd (%rdi), %ymm0, %k2 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0x17]
+; AVX10-256-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpblendmd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <16 x i32>, ptr %yp, align 4
%mask = icmp eq <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@@ -38,33 +63,57 @@ define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, ptr %yp) nounwind {
}
define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
-; CHECK-LABEL: test4_unsigned:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xc9,0x05]
-; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test4_unsigned:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpnltud %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xc9,0x05]
+; EVEX512-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test4_unsigned:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpnltud %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1e,0xcb,0x05]
+; AVX10-256-NEXT: vpcmpnltud %ymm2, %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xd2,0x05]
+; AVX10-256-NEXT: vpblendmd %ymm4, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc4]
+; AVX10-256-NEXT: vpblendmd %ymm5, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xcd]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = icmp uge <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
ret <16 x i32> %max
}
define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
-; CHECK-LABEL: test5:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc9]
-; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test5:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc9]
+; EVEX512-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test5:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpeqq %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x29,0xcb]
+; AVX10-256-NEXT: vpcmpeqq %ymm2, %ymm0, %k2 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xd2]
+; AVX10-256-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpblendmq %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = icmp eq <8 x i64> %x, %y
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
ret <8 x i64> %max
}
define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind {
-; CHECK-LABEL: test6_unsigned:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc9,0x06]
-; CHECK-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc2]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test6_unsigned:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc9,0x06]
+; EVEX512-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc2]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test6_unsigned:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpnleuq %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1e,0xcb,0x06]
+; AVX10-256-NEXT: vpcmpnleuq %ymm2, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xd2,0x06]
+; AVX10-256-NEXT: vpblendmq %ymm4, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x64,0xc4]
+; AVX10-256-NEXT: vpblendmq %ymm5, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x64,0xcd]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = icmp ugt <8 x i64> %x, %y
%max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
ret <8 x i64> %max
@@ -88,6 +137,13 @@ define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
; SKX-NEXT: vcmpltps %xmm2, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xca,0x01]
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test7:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x57,0xd2]
+; AVX10-256-NEXT: vcmpltps %xmm2, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xca,0x01]
+; AVX10-256-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = fcmp olt <4 x float> %a, zeroinitializer
%c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b
@@ -112,6 +168,13 @@ define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
; SKX-NEXT: vcmpltpd %xmm2, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xca,0x01]
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test8:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x57,0xd2]
+; AVX10-256-NEXT: vcmpltpd %xmm2, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xca,0x01]
+; AVX10-256-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = fcmp olt <2 x double> %a, zeroinitializer
%c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
ret <2 x double>%c
@@ -132,6 +195,12 @@ define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc9]
; SKX-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x64,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test9:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpeqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc9]
+; AVX10-256-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x64,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = icmp eq <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
ret <8 x i32> %max
@@ -152,6 +221,12 @@ define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x00]
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test10:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x00]
+; AVX10-256-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = fcmp oeq <8 x float> %x, %y
%max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
@@ -168,6 +243,11 @@ define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
; SKX: ## %bb.0:
; SKX-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xc1]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test11_unsigned:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xc1]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = icmp ugt <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
ret <8 x i32> %max
@@ -203,6 +283,22 @@ define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test12:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpeqq %ymm4, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc4]
+; AVX10-256-NEXT: vpcmpeqq %ymm5, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x29,0xcd]
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1 ## encoding: [0xc4,0xe3,0x79,0x32,0xc9,0x04]
+; AVX10-256-NEXT: korb %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x45,0xc1]
+; AVX10-256-NEXT: vpcmpeqq %ymm6, %ymm2, %k1 ## encoding: [0x62,0xf2,0xed,0x28,0x29,0xce]
+; AVX10-256-NEXT: vpcmpeqq %ymm7, %ymm3, %k2 ## encoding: [0x62,0xf2,0xe5,0x28,0x29,0xd7]
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2 ## encoding: [0xc4,0xe3,0x79,0x32,0xd2,0x04]
+; AVX10-256-NEXT: korb %k2, %k1, %k1 ## encoding: [0xc5,0xf5,0x45,0xca]
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
+; AVX10-256-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; AVX10-256-NEXT: ## kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%res = icmp eq <16 x i64> %a, %b
%res1 = bitcast <16 x i1> %res to i16
ret i16 %res1
@@ -237,6 +333,19 @@ define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
; SKX-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test12_v32i32:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpeqd %ymm4, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc4]
+; AVX10-256-NEXT: vpcmpeqd %ymm5, %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x76,0xcd]
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
+; AVX10-256-NEXT: vpcmpeqd %ymm6, %ymm2, %k1 ## encoding: [0x62,0xf1,0x6d,0x28,0x76,0xce]
+; AVX10-256-NEXT: vpcmpeqd %ymm7, %ymm3, %k2 ## encoding: [0x62,0xf1,0x65,0x28,0x76,0xd7]
+; AVX10-256-NEXT: kunpckbw %k1, %k2, %k1 ## encoding: [0xc5,0xed,0x4b,0xc9]
+; AVX10-256-NEXT: kunpckwd %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x4b,0xc0]
+; AVX10-256-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; AVX10-256-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%res = icmp eq <32 x i32> %a, %b
%res1 = bitcast <32 x i1> %res to i32
ret i32 %res1
@@ -291,6 +400,21 @@ define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
; SKX-NEXT: kmovq %k0, %rax ## encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test12_v64i16:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpeqw %ymm4, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc4]
+; AVX10-256-NEXT: vpcmpeqw %ymm5, %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x75,0xcd]
+; AVX10-256-NEXT: kunpckwd %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x4b,0xc0]
+; AVX10-256-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
+; AVX10-256-NEXT: vpcmpeqw %ymm6, %ymm2, %k0 ## encoding: [0x62,0xf1,0x6d,0x28,0x75,0xc6]
+; AVX10-256-NEXT: vpcmpeqw %ymm7, %ymm3, %k1 ## encoding: [0x62,0xf1,0x65,0x28,0x75,0xcf]
+; AVX10-256-NEXT: kunpckwd %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x4b,0xc0]
+; AVX10-256-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; AVX10-256-NEXT: shlq $32, %rax ## encoding: [0x48,0xc1,0xe0,0x20]
+; AVX10-256-NEXT: orq %rcx, %rax ## encoding: [0x48,0x09,0xc8]
+; AVX10-256-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%res = icmp eq <64 x i16> %a, %b
%res1 = bitcast <64 x i1> %res to i64
ret i64 %res1
@@ -310,6 +434,17 @@ define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
; SKX-NEXT: vpmovm2d %k0, %zmm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x38,0xc0]
; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xd0,0x1f]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test13:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpeqps %ymm2, %ymm0, %ymm0 ## encoding: [0xc5,0xfc,0xc2,0xc2,0x00]
+; AVX10-256-NEXT: vbroadcastss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2 ## EVEX TO VEX Compression ymm2 = [1,1,1,1,1,1,1,1]
+; AVX10-256-NEXT: ## encoding: [0xc4,0xe2,0x7d,0x18,0x15,A,A,A,A]
+; AVX10-256-NEXT: ## fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
+; AVX10-256-NEXT: vandps %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x54,0xc2]
+; AVX10-256-NEXT: vcmpeqps %ymm3, %ymm1, %ymm1 ## encoding: [0xc5,0xf4,0xc2,0xcb,0x00]
+; AVX10-256-NEXT: vandps %ymm2, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x54,0xca]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
{
%cmpvector_i = fcmp oeq <16 x float> %a, %b
%conv = zext <16 x i1> %cmpvector_i to <16 x i32>
@@ -317,12 +452,22 @@ define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
}
define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
-; CHECK-LABEL: test14:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc9]
-; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x66,0xc8]
-; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xc1]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test14:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpsubd %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc9]
+; EVEX512-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x66,0xc8]
+; EVEX512-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xc1]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test14:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsubd %ymm2, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0xd2]
+; AVX10-256-NEXT: vpsubd %ymm3, %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfa,0xdb]
+; AVX10-256-NEXT: vpcmpgtd %ymm1, %ymm3, %k1 ## encoding: [0x62,0xf1,0x65,0x28,0x66,0xc9]
+; AVX10-256-NEXT: vpcmpgtd %ymm0, %ymm2, %k2 ## encoding: [0x62,0xf1,0x6d,0x28,0x66,0xd0]
+; AVX10-256-NEXT: vmovdqa32 %ymm2, %ymm0 {%k2} {z} ## encoding: [0x62,0xf1,0x7d,0xaa,0x6f,0xc2]
+; AVX10-256-NEXT: vmovdqa32 %ymm3, %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0xcb]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%sub_r = sub <16 x i32> %a, %b
%cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
%sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>
@@ -332,12 +477,22 @@ define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
}
define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
-; CHECK-LABEL: test15:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc9]
-; CHECK-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xc8]
-; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0xc1]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test15:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpsubq %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc9]
+; EVEX512-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xc8]
+; EVEX512-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0xc1]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test15:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsubq %ymm2, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfb,0xd2]
+; AVX10-256-NEXT: vpsubq %ymm3, %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfb,0xdb]
+; AVX10-256-NEXT: vpcmpgtq %ymm1, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x37,0xc9]
+; AVX10-256-NEXT: vpcmpgtq %ymm0, %ymm2, %k2 ## encoding: [0x62,0xf2,0xed,0x28,0x37,0xd0]
+; AVX10-256-NEXT: vmovdqa64 %ymm2, %ymm0 {%k2} {z} ## encoding: [0x62,0xf1,0xfd,0xaa,0x6f,0xc2]
+; AVX10-256-NEXT: vmovdqa64 %ymm3, %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0xcb]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%sub_r = sub <8 x i64> %a, %b
%cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
%sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>
@@ -347,22 +502,38 @@ define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
}
define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
-; CHECK-LABEL: test16:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x05]
-; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test16:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpnltd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x05]
+; EVEX512-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test16:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpnltd %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xcb,0x05]
+; AVX10-256-NEXT: vpcmpnltd %ymm2, %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd2,0x05]
+; AVX10-256-NEXT: vpblendmd %ymm4, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc4]
+; AVX10-256-NEXT: vpblendmd %ymm5, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xcd]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = icmp sge <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
ret <16 x i32> %max
}
define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
-; CHECK-LABEL: test17:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0x0f]
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test17:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0x0f]
+; EVEX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test17:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpgtd 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x66,0x4f,0x01]
+; AVX10-256-NEXT: vpcmpgtd (%rdi), %ymm0, %k2 ## encoding: [0x62,0xf1,0x7d,0x28,0x66,0x17]
+; AVX10-256-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpblendmd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <16 x i32>, ptr %y.ptr, align 4
%mask = icmp sgt <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@@ -370,11 +541,19 @@ define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
}
define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
-; CHECK-LABEL: test18:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpled (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0x0f,0x02]
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test18:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpled (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0x0f,0x02]
+; EVEX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test18:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpled 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0x4f,0x01,0x02]
+; AVX10-256-NEXT: vpcmpled (%rdi), %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0x17,0x02]
+; AVX10-256-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpblendmd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <16 x i32>, ptr %y.ptr, align 4
%mask = icmp sle <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@@ -382,11 +561,19 @@ define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
}
define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
-; CHECK-LABEL: test19:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0x0f,0x02]
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test19:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpleud (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0x0f,0x02]
+; EVEX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test19:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpleud 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1e,0x4f,0x01,0x02]
+; AVX10-256-NEXT: vpcmpleud (%rdi), %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0x17,0x02]
+; AVX10-256-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpblendmd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <16 x i32>, ptr %y.ptr, align 4
%mask = icmp ule <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@@ -394,12 +581,22 @@ define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
}
define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
-; CHECK-LABEL: test20:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc9]
-; CHECK-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf1,0x6d,0x49,0x76,0xcb]
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test20:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc9]
+; EVEX512-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf1,0x6d,0x49,0x76,0xcb]
+; EVEX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test20:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpeqd %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x76,0xcb]
+; AVX10-256-NEXT: vpcmpeqd %ymm2, %ymm0, %k2 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xd2]
+; AVX10-256-NEXT: vpcmpeqd %ymm6, %ymm4, %k2 {%k2} ## encoding: [0x62,0xf1,0x5d,0x2a,0x76,0xd6]
+; AVX10-256-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpcmpeqd %ymm7, %ymm5, %k1 {%k1} ## encoding: [0x62,0xf1,0x55,0x29,0x76,0xcf]
+; AVX10-256-NEXT: vpblendmd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask1 = icmp eq <16 x i32> %x1, %y1
%mask0 = icmp eq <16 x i32> %x, %y
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
@@ -408,12 +605,22 @@ define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i3
}
define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
-; CHECK-LABEL: test21:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x02]
-; CHECK-NEXT: vpcmpnltq %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf3,0xed,0x49,0x1f,0xcb,0x05]
-; CHECK-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test21:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpleq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x02]
+; EVEX512-NEXT: vpcmpnltq %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf3,0xed,0x49,0x1f,0xcb,0x05]
+; EVEX512-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test21:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpleq %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xcb,0x02]
+; AVX10-256-NEXT: vpcmpleq %ymm2, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd2,0x02]
+; AVX10-256-NEXT: vpcmpnltq %ymm6, %ymm4, %k2 {%k2} ## encoding: [0x62,0xf3,0xdd,0x2a,0x1f,0xd6,0x05]
+; AVX10-256-NEXT: vpblendmq %ymm0, %ymm4, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xdd,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpcmpnltq %ymm7, %ymm5, %k1 {%k1} ## encoding: [0x62,0xf3,0xd5,0x29,0x1f,0xcf,0x05]
+; AVX10-256-NEXT: vpblendmq %ymm1, %ymm5, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xd5,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask1 = icmp sge <8 x i64> %x1, %y1
%mask0 = icmp sle <8 x i64> %x, %y
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
@@ -422,12 +629,22 @@ define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y
}
define <8 x i64> @test22(<8 x i64> %x, ptr %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
-; CHECK-LABEL: test22:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpgtq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xca]
-; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0x0f]
-; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test22:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpgtq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xca]
+; EVEX512-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0x0f]
+; EVEX512-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test22:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpgtq %ymm5, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x37,0xcd]
+; AVX10-256-NEXT: vpcmpgtq %ymm4, %ymm2, %k2 ## encoding: [0x62,0xf2,0xed,0x28,0x37,0xd4]
+; AVX10-256-NEXT: vpcmpgtq (%rdi), %ymm0, %k2 {%k2} ## encoding: [0x62,0xf2,0xfd,0x2a,0x37,0x17]
+; AVX10-256-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpcmpgtq 32(%rdi), %ymm1, %k1 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x37,0x4f,0x01]
+; AVX10-256-NEXT: vpblendmq %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask1 = icmp sgt <8 x i64> %x1, %y1
%y = load <8 x i64>, ptr %y.ptr, align 4
%mask0 = icmp sgt <8 x i64> %x, %y
@@ -437,12 +654,22 @@ define <8 x i64> @test22(<8 x i64> %x, ptr %y.ptr, <8 x i64> %x1, <8 x i64> %y1)
}
define <16 x i32> @test23(<16 x i32> %x, ptr %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
-; CHECK-LABEL: test23:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
-; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0x0f,0x02]
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test23:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
+; EVEX512-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0x0f,0x02]
+; EVEX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test23:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpnltd %ymm5, %ymm3, %k1 ## encoding: [0x62,0xf3,0x65,0x28,0x1f,0xcd,0x05]
+; AVX10-256-NEXT: vpcmpnltd %ymm4, %ymm2, %k2 ## encoding: [0x62,0xf3,0x6d,0x28,0x1f,0xd4,0x05]
+; AVX10-256-NEXT: vpcmpleud (%rdi), %ymm0, %k2 {%k2} ## encoding: [0x62,0xf3,0x7d,0x2a,0x1e,0x17,0x02]
+; AVX10-256-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpcmpleud 32(%rdi), %ymm1, %k1 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x1e,0x4f,0x01,0x02]
+; AVX10-256-NEXT: vpblendmd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask1 = icmp sge <16 x i32> %x1, %y1
%y = load <16 x i32>, ptr %y.ptr, align 4
%mask0 = icmp ule <16 x i32> %x, %y
@@ -452,11 +679,20 @@ define <16 x i32> @test23(<16 x i32> %x, ptr %y.ptr, <16 x i32> %x1, <16 x i32>
}
define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, ptr %yb.ptr) nounwind {
-; CHECK-LABEL: test24:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x58,0x29,0x0f]
-; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test24:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x58,0x29,0x0f]
+; EVEX512-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test24:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpbroadcastq (%rdi), %ymm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0x27]
+; AVX10-256-NEXT: vpcmpeqq %ymm4, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x29,0xcc]
+; AVX10-256-NEXT: vpcmpeqq %ymm4, %ymm0, %k2 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xd4]
+; AVX10-256-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpblendmq %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%yb = load i64, ptr %yb.ptr, align 4
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
%y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -466,11 +702,20 @@ define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, ptr %yb.ptr) nounwind {
}
define <16 x i32> @test25(<16 x i32> %x, ptr %yb.ptr, <16 x i32> %x1) nounwind {
-; CHECK-LABEL: test25:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x58,0x1f,0x0f,0x02]
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test25:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x58,0x1f,0x0f,0x02]
+; EVEX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test25:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpbroadcastd (%rdi), %ymm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x27]
+; AVX10-256-NEXT: vpcmpled %ymm4, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xcc,0x02]
+; AVX10-256-NEXT: vpcmpled %ymm4, %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd4,0x02]
+; AVX10-256-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpblendmd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%yb = load i32, ptr %yb.ptr, align 4
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
%y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -480,12 +725,23 @@ define <16 x i32> @test25(<16 x i32> %x, ptr %yb.ptr, <16 x i32> %x1) nounwind {
}
define <16 x i32> @test26(<16 x i32> %x, ptr %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
-; CHECK-LABEL: test26:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
-; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x66,0x0f]
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test26:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
+; EVEX512-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x66,0x0f]
+; EVEX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test26:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpbroadcastd (%rdi), %ymm6 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x37]
+; AVX10-256-NEXT: vpcmpgtd %ymm6, %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x66,0xce]
+; AVX10-256-NEXT: vpcmpgtd %ymm6, %ymm0, %k2 ## encoding: [0x62,0xf1,0x7d,0x28,0x66,0xd6]
+; AVX10-256-NEXT: vpcmpnltd %ymm4, %ymm2, %k2 {%k2} ## encoding: [0x62,0xf3,0x6d,0x2a,0x1f,0xd4,0x05]
+; AVX10-256-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpcmpnltd %ymm5, %ymm3, %k1 {%k1} ## encoding: [0x62,0xf3,0x65,0x29,0x1f,0xcd,0x05]
+; AVX10-256-NEXT: vpblendmd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask1 = icmp sge <16 x i32> %x1, %y1
%yb = load i32, ptr %yb.ptr, align 4
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
@@ -497,12 +753,23 @@ define <16 x i32> @test26(<16 x i32> %x, ptr %yb.ptr, <16 x i32> %x1, <16 x i32>
}
define <8 x i64> @test27(<8 x i64> %x, ptr %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
-; CHECK-LABEL: test27:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpnltq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x05]
-; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x59,0x1f,0x0f,0x02]
-; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test27:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpnltq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x05]
+; EVEX512-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x59,0x1f,0x0f,0x02]
+; EVEX512-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test27:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpbroadcastq (%rdi), %ymm6 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0x37]
+; AVX10-256-NEXT: vpcmpleq %ymm6, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xce,0x02]
+; AVX10-256-NEXT: vpcmpleq %ymm6, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd6,0x02]
+; AVX10-256-NEXT: vpcmpnltq %ymm4, %ymm2, %k2 {%k2} ## encoding: [0x62,0xf3,0xed,0x2a,0x1f,0xd4,0x05]
+; AVX10-256-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpcmpnltq %ymm5, %ymm3, %k1 {%k1} ## encoding: [0x62,0xf3,0xe5,0x29,0x1f,0xcd,0x05]
+; AVX10-256-NEXT: vpblendmq %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask1 = icmp sge <8 x i64> %x1, %y1
%yb = load i64, ptr %yb.ptr, align 4
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
@@ -530,6 +797,20 @@ define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1
; SKX-NEXT: kxnorb %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x46,0xc1]
; SKX-NEXT: vpmovm2d %k0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x38,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test28:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpgtq %ymm2, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x37,0xc2]
+; AVX10-256-NEXT: vpcmpgtq %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x37,0xcb]
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1 ## encoding: [0xc4,0xe3,0x79,0x32,0xc9,0x04]
+; AVX10-256-NEXT: korb %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x45,0xc1]
+; AVX10-256-NEXT: vpcmpgtq %ymm6, %ymm4, %k1 ## encoding: [0x62,0xf2,0xdd,0x28,0x37,0xce]
+; AVX10-256-NEXT: vpcmpgtq %ymm7, %ymm5, %k2 ## encoding: [0x62,0xf2,0xd5,0x28,0x37,0xd7]
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2 ## encoding: [0xc4,0xe3,0x79,0x32,0xd2,0x04]
+; AVX10-256-NEXT: korb %k2, %k1, %k1 ## encoding: [0xc5,0xf5,0x45,0xca]
+; AVX10-256-NEXT: kxnorb %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x46,0xc1]
+; AVX10-256-NEXT: vpmovm2d %k0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x38,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%x_gt_y = icmp sgt <8 x i64> %x, %y
%x1_gt_y1 = icmp sgt <8 x i64> %x1, %y1
%res = icmp eq <8 x i1>%x_gt_y, %x1_gt_y1
@@ -566,6 +847,19 @@ define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32>
; SKX-NEXT: vpmovm2b %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test29:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpgtd %ymm2, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc2]
+; AVX10-256-NEXT: vpcmpgtd %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x66,0xcb]
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
+; AVX10-256-NEXT: vpcmpgtd %ymm6, %ymm4, %k1 ## encoding: [0x62,0xf1,0x5d,0x28,0x66,0xce]
+; AVX10-256-NEXT: vpcmpgtd %ymm7, %ymm5, %k2 ## encoding: [0x62,0xf1,0x55,0x28,0x66,0xd7]
+; AVX10-256-NEXT: kunpckbw %k1, %k2, %k1 ## encoding: [0xc5,0xed,0x4b,0xc9]
+; AVX10-256-NEXT: kxorw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x47,0xc1]
+; AVX10-256-NEXT: vpmovm2b %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
+; AVX10-256-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%x_gt_y = icmp sgt <16 x i32> %x, %y
%x1_gt_y1 = icmp sgt <16 x i32> %x1, %y1
%res = icmp ne <16 x i1>%x_gt_y, %x1_gt_y1
@@ -588,6 +882,12 @@ define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind {
; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc9,0x00]
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test30:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc9,0x00]
+; AVX10-256-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = fcmp oeq <4 x double> %x, %y
%max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y
@@ -611,6 +911,12 @@ define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, ptr %yp) nounwind
; SKX-NEXT: vcmpltpd (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test31:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpltpd (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0x0f,0x01]
+; AVX10-256-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <2 x double>, ptr %yp, align 4
%mask = fcmp olt <2 x double> %x, %y
@@ -635,6 +941,12 @@ define <2 x double> @test31_commute(<2 x double> %x, <2 x double> %x1, ptr %yp)
; SKX-NEXT: vcmpgtpd (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test31_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpgtpd (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0x0f,0x0e]
+; AVX10-256-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <2 x double>, ptr %yp, align 4
%mask = fcmp olt <2 x double> %y, %x
@@ -658,6 +970,12 @@ define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, ptr %yp) nounwind
; SKX-NEXT: vcmpltpd (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test32:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpltpd (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x0f,0x01]
+; AVX10-256-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <4 x double>, ptr %yp, align 4
%mask = fcmp ogt <4 x double> %y, %x
@@ -681,6 +999,12 @@ define <4 x double> @test32_commute(<4 x double> %x, <4 x double> %x1, ptr %yp)
; SKX-NEXT: vcmpgtpd (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test32_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpgtpd (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x0f,0x0e]
+; AVX10-256-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <4 x double>, ptr %yp, align 4
%mask = fcmp ogt <4 x double> %x, %y
@@ -689,11 +1013,19 @@ define <4 x double> @test32_commute(<4 x double> %x, <4 x double> %x1, ptr %yp)
}
define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, ptr %yp) nounwind {
-; CHECK-LABEL: test33:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vcmpltpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x01]
-; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test33:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vcmpltpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x01]
+; EVEX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test33:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpltpd 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x28,0xc2,0x4f,0x01,0x01]
+; AVX10-256-NEXT: vcmpltpd (%rdi), %ymm0, %k2 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x17,0x01]
+; AVX10-256-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x65,0xc0]
+; AVX10-256-NEXT: vblendmpd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x65,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <8 x double>, ptr %yp, align 4
%mask = fcmp olt <8 x double> %x, %y
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
@@ -701,11 +1033,19 @@ define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, ptr %yp) nounwind
}
define <8 x double> @test33_commute(<8 x double> %x, <8 x double> %x1, ptr %yp) nounwind {
-; CHECK-LABEL: test33_commute:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vcmpgtpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x0e]
-; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test33_commute:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vcmpgtpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x0e]
+; EVEX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test33_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpgtpd 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x28,0xc2,0x4f,0x01,0x0e]
+; AVX10-256-NEXT: vcmpgtpd (%rdi), %ymm0, %k2 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x17,0x0e]
+; AVX10-256-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x65,0xc0]
+; AVX10-256-NEXT: vblendmpd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x65,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <8 x double>, ptr %yp, align 4
%mask = fcmp olt <8 x double> %y, %x
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
@@ -729,6 +1069,12 @@ define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, ptr %yp) nounwind {
; SKX-NEXT: vcmpltps (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test34:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpltps (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0x0f,0x01]
+; AVX10-256-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <4 x float>, ptr %yp, align 4
%mask = fcmp olt <4 x float> %x, %y
%max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
@@ -752,6 +1098,12 @@ define <4 x float> @test34_commute(<4 x float> %x, <4 x float> %x1, ptr %yp) nou
; SKX-NEXT: vcmpgtps (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test34_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpgtps (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0x0f,0x0e]
+; AVX10-256-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <4 x float>, ptr %yp, align 4
%mask = fcmp olt <4 x float> %y, %x
%max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
@@ -774,6 +1126,12 @@ define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, ptr %yp) nounwind {
; SKX-NEXT: vcmpltps (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test35:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpltps (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x0f,0x01]
+; AVX10-256-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <8 x float>, ptr %yp, align 4
%mask = fcmp ogt <8 x float> %y, %x
@@ -797,6 +1155,12 @@ define <8 x float> @test35_commute(<8 x float> %x, <8 x float> %x1, ptr %yp) nou
; SKX-NEXT: vcmpgtps (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test35_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpgtps (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x0f,0x0e]
+; AVX10-256-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <8 x float>, ptr %yp, align 4
%mask = fcmp ogt <8 x float> %x, %y
@@ -805,11 +1169,19 @@ define <8 x float> @test35_commute(<8 x float> %x, <8 x float> %x1, ptr %yp) nou
}
define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, ptr %yp) nounwind {
-; CHECK-LABEL: test36:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vcmpltps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x01]
-; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test36:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vcmpltps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x01]
+; EVEX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test36:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpltps 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0x4f,0x01,0x01]
+; AVX10-256-NEXT: vcmpltps (%rdi), %ymm0, %k2 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x17,0x01]
+; AVX10-256-NEXT: vblendmps %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x65,0xc0]
+; AVX10-256-NEXT: vblendmps %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x65,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <16 x float>, ptr %yp, align 4
%mask = fcmp olt <16 x float> %x, %y
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
@@ -817,11 +1189,19 @@ define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, ptr %yp) nounwind
}
define <16 x float> @test36_commute(<16 x float> %x, <16 x float> %x1, ptr %yp) nounwind {
-; CHECK-LABEL: test36_commute:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vcmpgtps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x0e]
-; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test36_commute:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vcmpgtps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x0e]
+; EVEX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test36_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpgtps 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0x4f,0x01,0x0e]
+; AVX10-256-NEXT: vcmpgtps (%rdi), %ymm0, %k2 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x17,0x0e]
+; AVX10-256-NEXT: vblendmps %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x65,0xc0]
+; AVX10-256-NEXT: vblendmps %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x65,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <16 x float>, ptr %yp, align 4
%mask = fcmp olt <16 x float> %y, %x
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
@@ -829,11 +1209,20 @@ define <16 x float> @test36_commute(<16 x float> %x, <16 x float> %x1, ptr %yp)
}
define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, ptr %ptr) nounwind {
-; CHECK-LABEL: test37:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
-; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test37:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
+; EVEX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test37:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vbroadcastsd (%rdi), %ymm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x27]
+; AVX10-256-NEXT: vcmpltpd %ymm4, %ymm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x28,0xc2,0xcc,0x01]
+; AVX10-256-NEXT: vcmpltpd %ymm4, %ymm0, %k2 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xd4,0x01]
+; AVX10-256-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x65,0xc0]
+; AVX10-256-NEXT: vblendmpd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x65,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load double, ptr %ptr
%v = insertelement <8 x double> undef, double %a, i32 0
@@ -845,11 +1234,20 @@ define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, ptr %ptr) nounwin
}
define <8 x double> @test37_commute(<8 x double> %x, <8 x double> %x1, ptr %ptr) nounwind {
-; CHECK-LABEL: test37_commute:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
-; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test37_commute:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
+; EVEX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test37_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vbroadcastsd (%rdi), %ymm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x27]
+; AVX10-256-NEXT: vcmpltpd %ymm1, %ymm4, %k1 ## encoding: [0x62,0xf1,0xdd,0x28,0xc2,0xc9,0x01]
+; AVX10-256-NEXT: vcmpltpd %ymm0, %ymm4, %k2 ## encoding: [0x62,0xf1,0xdd,0x28,0xc2,0xd0,0x01]
+; AVX10-256-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x65,0xc0]
+; AVX10-256-NEXT: vblendmpd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x65,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load double, ptr %ptr
%v = insertelement <8 x double> undef, double %a, i32 0
@@ -875,6 +1273,12 @@ define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, ptr %ptr) nounwin
; SKX-NEXT: vcmpltpd (%rdi){1to4}, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x38,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test38:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpltpd (%rdi){1to4}, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x38,0xc2,0x0f,0x01]
+; AVX10-256-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load double, ptr %ptr
%v = insertelement <4 x double> undef, double %a, i32 0
@@ -900,6 +1304,12 @@ define <4 x double> @test38_commute(<4 x double> %x, <4 x double> %x1, ptr %ptr)
; SKX-NEXT: vcmpgtpd (%rdi){1to4}, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x38,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test38_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpgtpd (%rdi){1to4}, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x38,0xc2,0x0f,0x0e]
+; AVX10-256-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load double, ptr %ptr
%v = insertelement <4 x double> undef, double %a, i32 0
@@ -926,6 +1336,12 @@ define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, ptr %ptr) nounwin
; SKX-NEXT: vcmpltpd (%rdi){1to2}, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x18,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test39:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpltpd (%rdi){1to2}, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x18,0xc2,0x0f,0x01]
+; AVX10-256-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load double, ptr %ptr
%v = insertelement <2 x double> undef, double %a, i32 0
@@ -952,6 +1368,12 @@ define <2 x double> @test39_commute(<2 x double> %x, <2 x double> %x1, ptr %ptr)
; SKX-NEXT: vcmpgtpd (%rdi){1to2}, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x18,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test39_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpgtpd (%rdi){1to2}, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x18,0xc2,0x0f,0x0e]
+; AVX10-256-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load double, ptr %ptr
%v = insertelement <2 x double> undef, double %a, i32 0
@@ -964,11 +1386,20 @@ define <2 x double> @test39_commute(<2 x double> %x, <2 x double> %x1, ptr %ptr)
define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, ptr %ptr) nounwind {
-; CHECK-LABEL: test40:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
-; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test40:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
+; EVEX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test40:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vbroadcastss (%rdi), %ymm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0x27]
+; AVX10-256-NEXT: vcmpltps %ymm4, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xcc,0x01]
+; AVX10-256-NEXT: vcmpltps %ymm4, %ymm0, %k2 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xd4,0x01]
+; AVX10-256-NEXT: vblendmps %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x65,0xc0]
+; AVX10-256-NEXT: vblendmps %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x65,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load float, ptr %ptr
%v = insertelement <16 x float> undef, float %a, i32 0
@@ -980,11 +1411,20 @@ define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, ptr %ptr) noun
}
define <16 x float> @test40_commute(<16 x float> %x, <16 x float> %x1, ptr %ptr) nounwind {
-; CHECK-LABEL: test40_commute:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
-; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test40_commute:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
+; EVEX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test40_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vbroadcastss (%rdi), %ymm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0x27]
+; AVX10-256-NEXT: vcmpltps %ymm1, %ymm4, %k1 ## encoding: [0x62,0xf1,0x5c,0x28,0xc2,0xc9,0x01]
+; AVX10-256-NEXT: vcmpltps %ymm0, %ymm4, %k2 ## encoding: [0x62,0xf1,0x5c,0x28,0xc2,0xd0,0x01]
+; AVX10-256-NEXT: vblendmps %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x65,0xc0]
+; AVX10-256-NEXT: vblendmps %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x65,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load float, ptr %ptr
%v = insertelement <16 x float> undef, float %a, i32 0
@@ -1010,6 +1450,12 @@ define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, ptr %ptr) nounwin
; SKX-NEXT: vcmpltps (%rdi){1to8}, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x38,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test41:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpltps (%rdi){1to8}, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x38,0xc2,0x0f,0x01]
+; AVX10-256-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load float, ptr %ptr
%v = insertelement <8 x float> undef, float %a, i32 0
@@ -1035,6 +1481,12 @@ define <8 x float> @test41_commute(<8 x float> %x, <8 x float> %x1, ptr %ptr)
; SKX-NEXT: vcmpgtps (%rdi){1to8}, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x38,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test41_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpgtps (%rdi){1to8}, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x38,0xc2,0x0f,0x0e]
+; AVX10-256-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load float, ptr %ptr
%v = insertelement <8 x float> undef, float %a, i32 0
@@ -1061,6 +1513,12 @@ define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, ptr %ptr) nounwin
; SKX-NEXT: vcmpltps (%rdi){1to4}, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test42:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpltps (%rdi){1to4}, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0x0f,0x01]
+; AVX10-256-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load float, ptr %ptr
%v = insertelement <4 x float> undef, float %a, i32 0
@@ -1087,6 +1545,12 @@ define <4 x float> @test42_commute(<4 x float> %x, <4 x float> %x1, ptr %ptr)
; SKX-NEXT: vcmpgtps (%rdi){1to4}, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test42_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpgtps (%rdi){1to4}, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0x0f,0x0e]
+; AVX10-256-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load float, ptr %ptr
%v = insertelement <4 x float> undef, float %a, i32 0
@@ -1122,6 +1586,18 @@ define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, ptr %ptr,<8 x i1>
; SKX-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test43:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllw $15, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0x71,0xf4,0x0f]
+; AVX10-256-NEXT: vpmovw2m %xmm4, %k1 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xcc]
+; AVX10-256-NEXT: vbroadcastsd (%rdi), %ymm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x27]
+; AVX10-256-NEXT: vcmpltpd %ymm4, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xc2,0xd4,0x01]
+; AVX10-256-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x65,0xc0]
+; AVX10-256-NEXT: kshiftrb $4, %k1, %k1 ## encoding: [0xc4,0xe3,0x79,0x30,0xc9,0x04]
+; AVX10-256-NEXT: vcmpltpd %ymm4, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0xc2,0xcc,0x01]
+; AVX10-256-NEXT: vblendmpd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x65,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load double, ptr %ptr
%v = insertelement <8 x double> undef, double %a, i32 0
@@ -1158,6 +1634,18 @@ define <8 x double> @test43_commute(<8 x double> %x, <8 x double> %x1, ptr %ptr,
; SKX-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test43_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllw $15, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0x71,0xf4,0x0f]
+; AVX10-256-NEXT: vpmovw2m %xmm4, %k1 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xcc]
+; AVX10-256-NEXT: vbroadcastsd (%rdi), %ymm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x27]
+; AVX10-256-NEXT: vcmpltpd %ymm0, %ymm4, %k2 {%k1} ## encoding: [0x62,0xf1,0xdd,0x29,0xc2,0xd0,0x01]
+; AVX10-256-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x65,0xc0]
+; AVX10-256-NEXT: kshiftrb $4, %k1, %k1 ## encoding: [0xc4,0xe3,0x79,0x30,0xc9,0x04]
+; AVX10-256-NEXT: vcmpltpd %ymm1, %ymm4, %k1 {%k1} ## encoding: [0x62,0xf1,0xdd,0x29,0xc2,0xc9,0x01]
+; AVX10-256-NEXT: vblendmpd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x65,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load double, ptr %ptr
%v = insertelement <8 x double> undef, double %a, i32 0
@@ -1181,6 +1669,12 @@ define <4 x i32> @test44(<4 x i16> %x, <4 x i16> %y) #0 {
; SKX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
; SKX-NEXT: vpmovm2d %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test44:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
+; AVX10-256-NEXT: vpmovm2d %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = icmp eq <4 x i16> %x, %y
%1 = sext <4 x i1> %mask to <4 x i32>
ret <4 x i32> %1
@@ -1202,6 +1696,13 @@ define <2 x i64> @test45(<2 x i16> %x, <2 x i16> %y) #0 {
; SKX-NEXT: vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x3f]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test45:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
+; AVX10-256-NEXT: vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
+; AVX10-256-NEXT: vpsrlq $63, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x3f]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = icmp eq <2 x i16> %x, %y
%1 = zext <2 x i1> %mask to <2 x i64>
ret <2 x i64> %1
@@ -1223,6 +1724,13 @@ define <2 x i64> @test46(<2 x float> %x, <2 x float> %y) #0 {
; SKX-NEXT: vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x3f]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test46:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpeqps %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x00]
+; AVX10-256-NEXT: vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
+; AVX10-256-NEXT: vpsrlq $63, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x3f]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = fcmp oeq <2 x float> %x, %y
%1 = zext <2 x i1> %mask to <2 x i64>
ret <2 x i64> %1
@@ -1254,6 +1762,15 @@ define <16 x i8> @test47(<16 x i32> %a, <16 x i8> %b, <16 x i8> %c) {
; SKX-NEXT: vpblendmb %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x66,0xc1]
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test47:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vptestnmd %ymm0, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc0]
+; AVX10-256-NEXT: vptestnmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x76,0x28,0x27,0xc9]
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k1 ## encoding: [0xc5,0xf5,0x4b,0xc8]
+; AVX10-256-NEXT: vpblendmb %xmm2, %xmm3, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x65,0x09,0x66,0xc2]
+; AVX10-256-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%cmp = icmp eq <16 x i32> %a, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i8> %b, <16 x i8> %c
ret <16 x i8> %res
@@ -1282,6 +1799,14 @@ define <16 x i16> @test48(<16 x i32> %a, <16 x i16> %b, <16 x i16> %c) {
; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
; SKX-NEXT: vpblendmw %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x66,0xc1]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test48:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vptestnmd %ymm0, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc0]
+; AVX10-256-NEXT: vptestnmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x76,0x28,0x27,0xc9]
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k1 ## encoding: [0xc5,0xf5,0x4b,0xc8]
+; AVX10-256-NEXT: vpblendmw %ymm2, %ymm3, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x66,0xc2]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%cmp = icmp eq <16 x i32> %a, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i16> %b, <16 x i16> %c
ret <16 x i16> %res
@@ -1313,6 +1838,16 @@ define <8 x i16> @test49(<8 x i64> %a, <8 x i16> %b, <8 x i16> %c) {
; SKX-NEXT: vpblendmw %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x66,0xc1]
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test49:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vptestnmq %ymm0, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc0]
+; AVX10-256-NEXT: vptestnmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf6,0x28,0x27,0xc9]
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1 ## encoding: [0xc4,0xe3,0x79,0x32,0xc9,0x04]
+; AVX10-256-NEXT: korb %k1, %k0, %k1 ## encoding: [0xc5,0xfd,0x45,0xc9]
+; AVX10-256-NEXT: vpblendmw %xmm2, %xmm3, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xe5,0x09,0x66,0xc2]
+; AVX10-256-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%cmp = icmp eq <8 x i64> %a, zeroinitializer
%res = select <8 x i1> %cmp, <8 x i16> %b, <8 x i16> %c
ret <8 x i16> %res
@@ -1342,6 +1877,16 @@ define i16 @pcmpeq_mem_1(<16 x i32> %a, ptr %b) {
; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: pcmpeq_mem_1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0x07]
+; AVX10-256-NEXT: vpcmpeqd 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x76,0x4f,0x01]
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
+; AVX10-256-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; AVX10-256-NEXT: ## kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%load = load <16 x i32>, ptr %b
%cmp = icmp eq <16 x i32> %a, %load
%cast = bitcast <16 x i1> %cmp to i16
@@ -1374,6 +1919,16 @@ define i16 @pcmpeq_mem_2(<16 x i32> %a, ptr %b) {
; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: pcmpeq_mem_2:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0x07]
+; AVX10-256-NEXT: vpcmpeqd 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x76,0x4f,0x01]
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
+; AVX10-256-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; AVX10-256-NEXT: ## kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%load = load <16 x i32>, ptr %b
%cmp = icmp eq <16 x i32> %load, %a
%cast = bitcast <16 x i1> %cmp to i16
@@ -1394,6 +1949,11 @@ define <2 x i64> @PR41066(<2 x i64> %t0, <2 x double> %x, <2 x double> %y) {
; SKX: ## %bb.0:
; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: PR41066:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vxorps %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%t1 = fcmp ogt <2 x double> %x, %y
%t2 = select <2 x i1> %t1, <2 x i64> <i64 undef, i64 0>, <2 x i64> zeroinitializer
ret <2 x i64> %t2
@@ -1421,6 +1981,16 @@ define <4 x i32> @zext_bool_logic(<4 x i64> %cond1, <4 x i64> %cond2, <4 x i32>
; SKX-NEXT: vpsubd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfa,0xc0]
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: zext_bool_logic:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vptestnmq %ymm0, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc0]
+; AVX10-256-NEXT: vptestnmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf6,0x28,0x27,0xc9]
+; AVX10-256-NEXT: korw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x45,0xc1]
+; AVX10-256-NEXT: vpmovm2d %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
+; AVX10-256-NEXT: vpsubd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfa,0xc0]
+; AVX10-256-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = icmp eq <4 x i64> %cond1, zeroinitializer
%b = icmp eq <4 x i64> %cond2, zeroinitializer
%c = or <4 x i1> %a, %b
@@ -1531,6 +2101,17 @@ define void @half_vec_compare(ptr %x, ptr %y) {
; SKX-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
; SKX-NEXT: vpextrw $0, %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x15,0x06,0x00]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: half_vec_compare:
+; AVX10-256: ## %bb.0: ## %entry
+; AVX10-256-NEXT: vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
+; AVX10-256-NEXT: ## xmm0 = mem[0],zero,zero,zero
+; AVX10-256-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x57,0xc9]
+; AVX10-256-NEXT: vcmpneqph %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7c,0x08,0xc2,0xc9,0x04]
+; AVX10-256-NEXT: vmovdqu8 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x05,A,A,A,A]
+; AVX10-256-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
+; AVX10-256-NEXT: vpextrw $0, %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x15,0x06,0x00]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
entry:
%0 = load <2 x half>, ptr %x
%1 = fcmp une <2 x half> %0, zeroinitializer
@@ -1571,6 +2152,16 @@ define <8 x i64> @cmp_swap_bug(ptr %x, <8 x i64> %y, <8 x i64> %z) {
; SKX-NEXT: vpmovb2m %xmm2, %k1 ## encoding: [0x62,0xf2,0x7e,0x08,0x29,0xca]
; SKX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: cmp_swap_bug:
+; AVX10-256: ## %bb.0: ## %entry
+; AVX10-256-NEXT: vmovdqa (%rdi), %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x27]
+; AVX10-256-NEXT: vpmovwb %xmm4, %xmm4 ## encoding: [0x62,0xf2,0x7e,0x08,0x30,0xe4]
+; AVX10-256-NEXT: vpmovb2m %xmm4, %k1 ## encoding: [0x62,0xf2,0x7e,0x08,0x29,0xcc]
+; AVX10-256-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x64,0xc0]
+; AVX10-256-NEXT: kshiftrb $4, %k1, %k1 ## encoding: [0xc4,0xe3,0x79,0x30,0xc9,0x04]
+; AVX10-256-NEXT: vpblendmq %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
entry:
%0 = load <16 x i8>, ptr %x
%1 = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -1594,6 +2185,12 @@ define <2 x i32> @narrow_cmp_select_reverse(<2 x i64> %x, <2 x i32> %y) nounwind
; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
; SKX-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xc1]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: narrow_cmp_select_reverse:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
+; AVX10-256-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xc1]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = icmp eq <2 x i64> %x, zeroinitializer
%res = select <2 x i1> %mask, <2 x i32> zeroinitializer, <2 x i32> %y
ret <2 x i32> %res
diff --git a/llvm/test/CodeGen/X86/avx512bw-mask-op.ll b/llvm/test/CodeGen/X86/avx512bw-mask-op.ll
index 2a262644836135..3c91c2948fc909 100644
--- a/llvm/test/CodeGen/X86/avx512bw-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-mask-op.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=CHECK,AVX10-256
define i32 @mask32(i32 %x) {
; CHECK-LABEL: mask32:
@@ -54,12 +55,22 @@ define void @mask32_mem(ptr %ptr) {
}
define void @mask64_mem(ptr %ptr) {
-; CHECK-LABEL: mask64_mem:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovq (%rdi), %k0
-; CHECK-NEXT: knotq %k0, %k0
-; CHECK-NEXT: kmovq %k0, (%rdi)
-; CHECK-NEXT: retq
+; SKX-LABEL: mask64_mem:
+; SKX: ## %bb.0:
+; SKX-NEXT: kmovq (%rdi), %k0
+; SKX-NEXT: knotq %k0, %k0
+; SKX-NEXT: kmovq %k0, (%rdi)
+; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: mask64_mem:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovd (%rdi), %k0
+; AVX10-256-NEXT: kmovd 4(%rdi), %k1
+; AVX10-256-NEXT: knotd %k1, %k1
+; AVX10-256-NEXT: knotd %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, (%rdi)
+; AVX10-256-NEXT: kmovd %k1, 4(%rdi)
+; AVX10-256-NEXT: retq
%x = load i64, ptr %ptr, align 4
%m0 = bitcast i64 %x to <64 x i1>
%m1 = xor <64 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
@@ -123,13 +134,27 @@ define i64 @mand64(i64 %x, i64 %y) {
}
define i64 @mand64_mem(ptr %x, ptr %y) {
-; CHECK-LABEL: mand64_mem:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovq (%rdi), %k0
-; CHECK-NEXT: kmovq (%rsi), %k1
-; CHECK-NEXT: korq %k1, %k0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; SKX-LABEL: mand64_mem:
+; SKX: ## %bb.0:
+; SKX-NEXT: kmovq (%rdi), %k0
+; SKX-NEXT: kmovq (%rsi), %k1
+; SKX-NEXT: korq %k1, %k0, %k0
+; SKX-NEXT: kmovq %k0, %rax
+; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: mand64_mem:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovd (%rdi), %k0
+; AVX10-256-NEXT: kmovd 4(%rdi), %k1
+; AVX10-256-NEXT: kmovd (%rsi), %k2
+; AVX10-256-NEXT: kord %k2, %k0, %k0
+; AVX10-256-NEXT: kmovd 4(%rsi), %k2
+; AVX10-256-NEXT: kord %k2, %k1, %k1
+; AVX10-256-NEXT: kmovd %k0, %ecx
+; AVX10-256-NEXT: kmovd %k1, %eax
+; AVX10-256-NEXT: shlq $32, %rax
+; AVX10-256-NEXT: orq %rcx, %rax
+; AVX10-256-NEXT: retq
%ma = load <64 x i1>, ptr %x
%mb = load <64 x i1>, ptr %y
%mc = and <64 x i1> %ma, %mb
@@ -229,12 +254,22 @@ define <32 x i1> @bitcast_f32_to_v32i1(float %x) {
}
define <64 x i1> @bitcast_f64_to_v64i1(double %x) {
-; CHECK-LABEL: bitcast_f64_to_v64i1:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vmovq %xmm0, %rax
-; CHECK-NEXT: kmovq %rax, %k0
-; CHECK-NEXT: vpmovm2b %k0, %zmm0
-; CHECK-NEXT: retq
+; SKX-LABEL: bitcast_f64_to_v64i1:
+; SKX: ## %bb.0:
+; SKX-NEXT: vmovq %xmm0, %rax
+; SKX-NEXT: kmovq %rax, %k0
+; SKX-NEXT: vpmovm2b %k0, %zmm0
+; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: bitcast_f64_to_v64i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vmovq %xmm0, %rax
+; AVX10-256-NEXT: kmovd %eax, %k0
+; AVX10-256-NEXT: shrq $32, %rax
+; AVX10-256-NEXT: kmovd %eax, %k1
+; AVX10-256-NEXT: vpmovm2b %k0, %ymm0
+; AVX10-256-NEXT: vpmovm2b %k1, %ymm1
+; AVX10-256-NEXT: retq
%a = bitcast double %x to <64 x i1>
ret <64 x i1> %a
}
@@ -252,14 +287,28 @@ define float @bitcast_v32i1_to_f32(<32 x i1> %x) {
}
define double @bitcast_v64i1_to_f64(<64 x i1> %x) {
-; CHECK-LABEL: bitcast_v64i1_to_f64:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpsllw $7, %zmm0, %zmm0
-; CHECK-NEXT: vpmovb2m %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vmovq %rax, %xmm0
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; SKX-LABEL: bitcast_v64i1_to_f64:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpsllw $7, %zmm0, %zmm0
+; SKX-NEXT: vpmovb2m %zmm0, %k0
+; SKX-NEXT: kmovq %k0, %rax
+; SKX-NEXT: vmovq %rax, %xmm0
+; SKX-NEXT: vzeroupper
+; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: bitcast_v64i1_to_f64:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllw $7, %ymm1, %ymm1
+; AVX10-256-NEXT: vpmovb2m %ymm1, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: shlq $32, %rax
+; AVX10-256-NEXT: vpsllw $7, %ymm0, %ymm0
+; AVX10-256-NEXT: vpmovb2m %ymm0, %k0
+; AVX10-256-NEXT: kmovd %k0, %ecx
+; AVX10-256-NEXT: orq %rax, %rcx
+; AVX10-256-NEXT: vmovq %rcx, %xmm0
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = bitcast <64 x i1> %x to double
ret double %a
}
diff --git a/llvm/test/CodeGen/X86/kshift.ll b/llvm/test/CodeGen/X86/kshift.ll
index 0acf82f5a144a2..16444adb1dc568 100644
--- a/llvm/test/CodeGen/X86/kshift.ll
+++ b/llvm/test/CodeGen/X86/kshift.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=KNL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq,avx512bw | FileCheck %s --check-prefix=SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx10.1-256 | FileCheck %s --check-prefix=AVX10-256
define i8 @kshiftl_v8i1_1(<8 x i64> %x, <8 x i64> %y) {
; KNL-LABEL: kshiftl_v8i1_1:
@@ -22,6 +23,23 @@ define i8 @kshiftl_v8i1_1(<8 x i64> %x, <8 x i64> %y) {
; SKX-NEXT: # kill: def $al killed $al killed $eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftl_v8i1_1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: kshiftlb $1, %k0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: kandb %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <8 x i64> %x, zeroinitializer
%b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
%c = icmp eq <8 x i64> %y, zeroinitializer
@@ -50,6 +68,21 @@ define i16 @kshiftl_v16i1_1(<16 x i32> %x, <16 x i32> %y) {
; SKX-NEXT: # kill: def $ax killed $ax killed $eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftl_v16i1_1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT: kshiftlw $1, %k0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmd %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kunpckbw %k1, %k2, %k1
+; AVX10-256-NEXT: kandw %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <16 x i32> %x, zeroinitializer
%b = shufflevector <16 x i1> %a, <16 x i1> zeroinitializer, <16 x i32> <i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
%c = icmp eq <16 x i32> %y, zeroinitializer
@@ -96,6 +129,20 @@ define i32 @kshiftl_v32i1_1(<32 x i16> %x, <32 x i16> %y) {
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftl_v32i1_1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmw %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmw %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kunpckwd %k0, %k1, %k0
+; AVX10-256-NEXT: kshiftld $1, %k0, %k0
+; AVX10-256-NEXT: vptestnmw %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmw %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kunpckwd %k1, %k2, %k1
+; AVX10-256-NEXT: kandd %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <32 x i16> %x, zeroinitializer
%b = shufflevector <32 x i1> %a, <32 x i1> zeroinitializer, <32 x i32> <i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
%c = icmp eq <32 x i16> %y, zeroinitializer
@@ -166,6 +213,25 @@ define i64 @kshiftl_v64i1_1(<64 x i8> %x, <64 x i8> %y) {
; SKX-NEXT: kmovq %k0, %rax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftl_v64i1_1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmb %ymm1, %ymm1, %k0
+; AVX10-256-NEXT: vptestnmb %ymm0, %ymm0, %k1
+; AVX10-256-NEXT: vpmovm2b %k1, %ymm0
+; AVX10-256-NEXT: vpmovm2b %k0, %ymm1
+; AVX10-256-NEXT: vmovdqa {{.*#+}} ymm4 = [63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
+; AVX10-256-NEXT: vpermi2b %ymm0, %ymm1, %ymm4
+; AVX10-256-NEXT: vpmovb2m %ymm4, %k2
+; AVX10-256-NEXT: kshiftld $1, %k1, %k1
+; AVX10-256-NEXT: vptestnmb %ymm3, %ymm3, %k0 {%k2}
+; AVX10-256-NEXT: kmovd %k0, %ecx
+; AVX10-256-NEXT: shlq $32, %rcx
+; AVX10-256-NEXT: vptestnmb %ymm2, %ymm2, %k0 {%k1}
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: orq %rcx, %rax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <64 x i8> %x, zeroinitializer
%b = shufflevector <64 x i1> %a, <64 x i1> zeroinitializer, <64 x i32> <i32 64, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
%c = icmp eq <64 x i8> %y, zeroinitializer
@@ -194,6 +260,20 @@ define i8 @kshiftl_v8i1_7(<8 x i64> %x, <8 x i64> %y) {
; SKX-NEXT: # kill: def $al killed $al killed $eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftl_v8i1_7:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: kshiftlb $7, %k0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: kandb %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <8 x i64> %x, zeroinitializer
%b = shufflevector <8 x i1> zeroinitializer, <8 x i1> %a, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
%c = icmp eq <8 x i64> %y, zeroinitializer
@@ -222,6 +302,19 @@ define i16 @kshiftl_v16i1_15(<16 x i32> %x, <16 x i32> %y) {
; SKX-NEXT: # kill: def $ax killed $ax killed $eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftl_v16i1_15:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: kshiftlw $15, %k0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmd %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kunpckbw %k1, %k2, %k1
+; AVX10-256-NEXT: kandw %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <16 x i32> %x, zeroinitializer
%b = shufflevector <16 x i1> zeroinitializer, <16 x i1> %a, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
%c = icmp eq <16 x i32> %y, zeroinitializer
@@ -255,6 +348,18 @@ define i32 @kshiftl_v32i1_31(<32 x i16> %x, <32 x i16> %y) {
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftl_v32i1_31:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmw %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: kshiftld $31, %k0, %k0
+; AVX10-256-NEXT: vptestnmw %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmw %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kunpckwd %k1, %k2, %k1
+; AVX10-256-NEXT: kandd %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <32 x i16> %x, zeroinitializer
%b = shufflevector <32 x i1> zeroinitializer, <32 x i1> %a, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32>
%c = icmp eq <32 x i16> %y, zeroinitializer
@@ -291,6 +396,16 @@ define i64 @kshiftl_v64i1_63(<64 x i8> %x, <64 x i8> %y) {
; SKX-NEXT: kmovq %k0, %rax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftl_v64i1_63:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmb %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: kshiftld $31, %k0, %k1
+; AVX10-256-NEXT: vptestnmb %ymm3, %ymm3, %k0 {%k1}
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: shlq $32, %rax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <64 x i8> %x, zeroinitializer
%b = shufflevector <64 x i1> zeroinitializer, <64 x i1> %a, <64 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64>
%c = icmp eq <64 x i8> %y, zeroinitializer
@@ -320,6 +435,23 @@ define i8 @kshiftr_v8i1_1(<8 x i64> %x, <8 x i64> %y) {
; SKX-NEXT: # kill: def $al killed $al killed $eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftr_v8i1_1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: kshiftrb $1, %k0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: kandb %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <8 x i64> %x, zeroinitializer
%b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
%c = icmp eq <8 x i64> %y, zeroinitializer
@@ -348,6 +480,21 @@ define i16 @kshiftr_v16i1_1(<16 x i32> %x, <16 x i32> %y) {
; SKX-NEXT: # kill: def $ax killed $ax killed $eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftr_v16i1_1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT: kshiftrw $1, %k0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmd %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kunpckbw %k1, %k2, %k1
+; AVX10-256-NEXT: kandw %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <16 x i32> %x, zeroinitializer
%b = shufflevector <16 x i1> %a, <16 x i1> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
%c = icmp eq <16 x i32> %y, zeroinitializer
@@ -394,6 +541,20 @@ define i32 @kshiftr_v32i1_1(<32 x i16> %x, <32 x i16> %y) {
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftr_v32i1_1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmw %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmw %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kunpckwd %k0, %k1, %k0
+; AVX10-256-NEXT: kshiftrd $1, %k0, %k0
+; AVX10-256-NEXT: vptestnmw %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmw %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kunpckwd %k1, %k2, %k1
+; AVX10-256-NEXT: kandd %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <32 x i16> %x, zeroinitializer
%b = shufflevector <32 x i1> %a, <32 x i1> zeroinitializer, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32>
%c = icmp eq <32 x i16> %y, zeroinitializer
@@ -464,6 +625,25 @@ define i64 @kshiftr_v64i1_1(<64 x i8> %x, <64 x i8> %y) {
; SKX-NEXT: kmovq %k0, %rax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftr_v64i1_1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmb %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmb %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: vpmovm2b %k1, %ymm0
+; AVX10-256-NEXT: vpmovm2b %k0, %ymm1
+; AVX10-256-NEXT: vmovdqa {{.*#+}} ymm4 = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32]
+; AVX10-256-NEXT: vpermi2b %ymm0, %ymm1, %ymm4
+; AVX10-256-NEXT: vpmovb2m %ymm4, %k2
+; AVX10-256-NEXT: kshiftrd $1, %k1, %k1
+; AVX10-256-NEXT: vptestnmb %ymm2, %ymm2, %k0 {%k2}
+; AVX10-256-NEXT: kmovd %k0, %ecx
+; AVX10-256-NEXT: vptestnmb %ymm3, %ymm3, %k0 {%k1}
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: shlq $32, %rax
+; AVX10-256-NEXT: orq %rcx, %rax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <64 x i8> %x, zeroinitializer
%b = shufflevector <64 x i1> %a, <64 x i1> zeroinitializer, <64 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64>
%c = icmp eq <64 x i8> %y, zeroinitializer
@@ -492,6 +672,23 @@ define i8 @kshiftr_v8i1_7(<8 x i64> %x, <8 x i64> %y) {
; SKX-NEXT: # kill: def $al killed $al killed $eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftr_v8i1_7:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: kshiftlb $1, %k0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: kandb %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <8 x i64> %x, zeroinitializer
%b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
%c = icmp eq <8 x i64> %y, zeroinitializer
@@ -520,6 +717,20 @@ define i16 @kshiftr_v16i1_15(<16 x i32> %x, <16 x i32> %y) {
; SKX-NEXT: # kill: def $ax killed $ax killed $eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftr_v16i1_15:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmd %ymm1, %ymm1, %k0
+; AVX10-256-NEXT: kshiftlw $8, %k0, %k0
+; AVX10-256-NEXT: kshiftrw $15, %k0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmd %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kunpckbw %k1, %k2, %k1
+; AVX10-256-NEXT: kandw %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <16 x i32> %x, zeroinitializer
%b = shufflevector <16 x i1> zeroinitializer, <16 x i1> %a, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
%c = icmp eq <16 x i32> %y, zeroinitializer
@@ -552,6 +763,19 @@ define i32 @kshiftr_v32i1_31(<32 x i16> %x, <32 x i16> %y) {
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftr_v32i1_31:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmw %ymm1, %ymm1, %k0
+; AVX10-256-NEXT: kshiftld $16, %k0, %k0
+; AVX10-256-NEXT: kshiftrd $31, %k0, %k0
+; AVX10-256-NEXT: vptestnmw %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmw %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kunpckwd %k1, %k2, %k1
+; AVX10-256-NEXT: kandd %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <32 x i16> %x, zeroinitializer
%b = shufflevector <32 x i1> zeroinitializer, <32 x i1> %a, <32 x i32> <i32 63, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
%c = icmp eq <32 x i16> %y, zeroinitializer
@@ -586,6 +810,15 @@ define i64 @kshiftr_v64i1_63(<64 x i8> %x, <64 x i8> %y) {
; SKX-NEXT: kmovq %k0, %rax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftr_v64i1_63:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmb %ymm1, %ymm1, %k0
+; AVX10-256-NEXT: kshiftrd $31, %k0, %k1
+; AVX10-256-NEXT: vptestnmb %ymm2, %ymm2, %k0 {%k1}
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <64 x i8> %x, zeroinitializer
%b = shufflevector <64 x i1> zeroinitializer, <64 x i1> %a, <64 x i32> <i32 127, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
%c = icmp eq <64 x i8> %y, zeroinitializer
@@ -614,6 +847,23 @@ define i8 @kshiftl_v8i1_zu123u56(<8 x i64> %x, <8 x i64> %y) {
; SKX-NEXT: # kill: def $al killed $al killed $eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftl_v8i1_zu123u56:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: kshiftlb $1, %k0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: kandb %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <8 x i64> %x, zeroinitializer
%b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 8, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 5, i32 6>
%c = icmp eq <8 x i64> %y, zeroinitializer
@@ -642,6 +892,23 @@ define i8 @kshiftl_v8i1_u0123456(<8 x i64> %x, <8 x i64> %y) {
; SKX-NEXT: # kill: def $al killed $al killed $eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftl_v8i1_u0123456:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: kshiftlb $1, %k0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: kandb %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <8 x i64> %x, zeroinitializer
%b = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
%c = icmp eq <8 x i64> %y, zeroinitializer
@@ -671,6 +938,23 @@ define i8 @kshiftr_v8i1_1u3u567z(<8 x i64> %x, <8 x i64> %y) {
; SKX-NEXT: # kill: def $al killed $al killed $eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftr_v8i1_1u3u567z:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: kshiftrb $1, %k0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: kandb %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <8 x i64> %x, zeroinitializer
%b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 undef, i32 5, i32 6, i32 7, i32 8>
%c = icmp eq <8 x i64> %y, zeroinitializer
@@ -699,6 +983,23 @@ define i8 @kshiftr_v8i1_234567uu(<8 x i64> %x, <8 x i64> %y) {
; SKX-NEXT: # kill: def $al killed $al killed $eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftr_v8i1_234567uu:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: kshiftrb $2, %k0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: kandb %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <8 x i64> %x, zeroinitializer
%b = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 10>
%c = icmp eq <8 x i64> %y, zeroinitializer
diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index f26bbb7e5c2bda..53f99431598bf1 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -3,8 +3,9 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,KNL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,EVEX512,KNL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,EVEX512,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=AVX,AVX512,AVX10-256
define i1 @allones_v16i8_sign(<16 x i8> %arg) {
; SSE-LABEL: allones_v16i8_sign:
@@ -176,6 +177,15 @@ define i1 @allones_v64i8_sign(<64 x i8> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v64i8_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpand %ymm0, %ymm1, %ymm0
+; AVX10-256-NEXT: vpmovb2m %ymm0, %k0
+; AVX10-256-NEXT: kortestd %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = icmp slt <64 x i8> %arg, zeroinitializer
%tmp1 = bitcast <64 x i1> %tmp to i64
%tmp2 = icmp eq i64 %tmp1, -1
@@ -232,6 +242,15 @@ define i1 @allzeros_v64i8_sign(<64 x i8> %arg) {
; SKX-NEXT: sete %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v64i8_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT: vpmovb2m %ymm0, %k0
+; AVX10-256-NEXT: kortestd %k0, %k0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = icmp slt <64 x i8> %arg, zeroinitializer
%tmp1 = bitcast <64 x i1> %tmp to i64
%tmp2 = icmp eq i64 %tmp1, 0
@@ -273,6 +292,13 @@ define i1 @allones_v8i16_sign(<8 x i16> %arg) {
; SKX-NEXT: kortestb %k0, %k0
; SKX-NEXT: setb %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v8i16_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpmovw2m %xmm0, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: retq
%tmp = icmp slt <8 x i16> %arg, zeroinitializer
%tmp1 = bitcast <8 x i1> %tmp to i8
%tmp2 = icmp eq i8 %tmp1, -1
@@ -311,6 +337,13 @@ define i1 @allzeros_v8i16_sign(<8 x i16> %arg) {
; SKX-NEXT: kortestb %k0, %k0
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v8i16_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpmovw2m %xmm0, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = icmp slt <8 x i16> %arg, zeroinitializer
%tmp1 = bitcast <8 x i1> %tmp to i8
%tmp2 = icmp eq i8 %tmp1, 0
@@ -364,6 +397,14 @@ define i1 @allones_v16i16_sign(<16 x i16> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v16i16_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpmovw2m %ymm0, %k0
+; AVX10-256-NEXT: kortestw %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = icmp slt <16 x i16> %arg, zeroinitializer
%tmp1 = bitcast <16 x i1> %tmp to i16
%tmp2 = icmp eq i16 %tmp1, -1
@@ -415,6 +456,14 @@ define i1 @allzeros_v16i16_sign(<16 x i16> %arg) {
; SKX-NEXT: sete %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v16i16_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpmovw2m %ymm0, %k0
+; AVX10-256-NEXT: kortestw %k0, %k0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = icmp slt <16 x i16> %arg, zeroinitializer
%tmp1 = bitcast <16 x i1> %tmp to i16
%tmp2 = icmp eq i16 %tmp1, 0
@@ -479,6 +528,16 @@ define i1 @allones_v32i16_sign(<32 x i16> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v32i16_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpmovw2m %ymm0, %k0
+; AVX10-256-NEXT: vpmovw2m %ymm1, %k1
+; AVX10-256-NEXT: kunpckwd %k0, %k1, %k0
+; AVX10-256-NEXT: kortestd %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = icmp slt <32 x i16> %arg, zeroinitializer
%tmp1 = bitcast <32 x i1> %tmp to i32
%tmp2 = icmp eq i32 %tmp1, -1
@@ -539,6 +598,16 @@ define i1 @allzeros_v32i16_sign(<32 x i16> %arg) {
; SKX-NEXT: sete %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v32i16_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpmovw2m %ymm0, %k0
+; AVX10-256-NEXT: vpmovw2m %ymm1, %k1
+; AVX10-256-NEXT: kunpckwd %k0, %k1, %k0
+; AVX10-256-NEXT: kortestd %k0, %k0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = icmp slt <32 x i16> %arg, zeroinitializer
%tmp1 = bitcast <32 x i1> %tmp to i32
%tmp2 = icmp eq i32 %tmp1, 0
@@ -697,6 +766,16 @@ define i1 @allones_v16i32_sign(<16 x i32> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v16i32_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpmovd2m %ymm0, %k0
+; AVX10-256-NEXT: vpmovd2m %ymm1, %k1
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT: kortestw %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = icmp slt <16 x i32> %arg, zeroinitializer
%tmp1 = bitcast <16 x i1> %tmp to i16
%tmp2 = icmp eq i16 %tmp1, -1
@@ -755,6 +834,16 @@ define i1 @allzeros_v16i32_sign(<16 x i32> %arg) {
; SKX-NEXT: sete %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v16i32_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpmovd2m %ymm0, %k0
+; AVX10-256-NEXT: vpmovd2m %ymm1, %k1
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT: kortestw %k0, %k0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = icmp slt <16 x i32> %arg, zeroinitializer
%tmp1 = bitcast <16 x i1> %tmp to i16
%tmp2 = icmp eq i16 %tmp1, 0
@@ -872,6 +961,17 @@ define i1 @allones_v8i64_sign(<8 x i64> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v8i64_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpmovq2m %ymm0, %k0
+; AVX10-256-NEXT: vpmovq2m %ymm1, %k1
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = icmp slt <8 x i64> %arg, zeroinitializer
%tmp1 = bitcast <8 x i1> %tmp to i8
%tmp2 = icmp eq i8 %tmp1, -1
@@ -926,6 +1026,17 @@ define i1 @allzeros_v8i64_sign(<8 x i64> %arg) {
; SKX-NEXT: sete %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v8i64_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpmovq2m %ymm0, %k0
+; AVX10-256-NEXT: vpmovq2m %ymm1, %k1
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = icmp slt <8 x i64> %arg, zeroinitializer
%tmp1 = bitcast <8 x i1> %tmp to i8
%tmp2 = icmp eq i8 %tmp1, 0
@@ -963,6 +1074,13 @@ define i1 @allones_v16i8_and1(<16 x i8> %arg) {
; SKX-NEXT: kortestw %k0, %k0
; SKX-NEXT: setb %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v16i8_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; AVX10-256-NEXT: kortestw %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: retq
%tmp = and <16 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
%tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -1092,13 +1210,21 @@ define i1 @allzeros_v8i64_not(<8 x i64> %a0) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: allzeros_v8i64_not:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: setne %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; EVEX512-LABEL: allzeros_v8i64_not:
+; EVEX512: # %bb.0:
+; EVEX512-NEXT: vptestmd %zmm0, %zmm0, %k0
+; EVEX512-NEXT: kortestw %k0, %k0
+; EVEX512-NEXT: setne %al
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v8i64_not:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT: vptest %ymm0, %ymm0
+; AVX10-256-NEXT: setne %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%1 = icmp eq <8 x i64> %a0, zeroinitializer
%2 = bitcast <8 x i1> %1 to i8
%3 = icmp ne i8 %2, -1
@@ -1138,6 +1264,13 @@ define i1 @allzeros_v16i8_and1(<16 x i8> %arg) {
; SKX-NEXT: vptest %xmm1, %xmm0
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v16i8_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} xmm1 = [72340172838076673,72340172838076673]
+; AVX10-256-NEXT: vptest %xmm1, %xmm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = and <16 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
%tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -1191,6 +1324,14 @@ define i1 @allones_v32i8_and1(<32 x i8> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v32i8_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; AVX10-256-NEXT: kortestd %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <32 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
%tmp2 = bitcast <32 x i1> %tmp1 to i32
@@ -1298,6 +1439,16 @@ define i1 @allones_v64i8_and1(<64 x i8> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v64i8_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastb {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX10-256-NEXT: vptestmb %ymm2, %ymm0, %k1
+; AVX10-256-NEXT: vptestmb %ymm2, %ymm1, %k0 {%k1}
+; AVX10-256-NEXT: kortestd %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <64 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
%tmp2 = bitcast <64 x i1> %tmp1 to i64
@@ -1343,13 +1494,22 @@ define i1 @allzeros_v64i8_and1(<64 x i8> %arg) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: allzeros_v64i8_and1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; EVEX512-LABEL: allzeros_v64i8_and1:
+; EVEX512: # %bb.0:
+; EVEX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
+; EVEX512-NEXT: kortestw %k0, %k0
+; EVEX512-NEXT: sete %al
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v64i8_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673]
+; AVX10-256-NEXT: vptest %ymm1, %ymm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <64 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
%tmp2 = bitcast <64 x i1> %tmp1 to i64
@@ -1394,6 +1554,13 @@ define i1 @allones_v8i16_and1(<8 x i16> %arg) {
; SKX-NEXT: kortestb %k0, %k0
; SKX-NEXT: setb %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v8i16_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: retq
%tmp = and <8 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
%tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -1434,6 +1601,13 @@ define i1 @allzeros_v8i16_and1(<8 x i16> %arg) {
; SKX-NEXT: vptest %xmm1, %xmm0
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v8i16_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} xmm1 = [281479271743489,281479271743489]
+; AVX10-256-NEXT: vptest %xmm1, %xmm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = and <8 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
%tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -1493,6 +1667,14 @@ define i1 @allones_v16i16_and1(<16 x i16> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v16i16_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; AVX10-256-NEXT: kortestw %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <16 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
%tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -1565,6 +1747,17 @@ define i1 @allones_v32i16_and1(<32 x i16> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v32i16_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastw {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX10-256-NEXT: vptestmw %ymm2, %ymm0, %k0
+; AVX10-256-NEXT: vptestmw %ymm2, %ymm1, %k1
+; AVX10-256-NEXT: kunpckwd %k0, %k1, %k0
+; AVX10-256-NEXT: kortestd %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <32 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
%tmp2 = bitcast <32 x i1> %tmp1 to i32
@@ -1610,13 +1803,22 @@ define i1 @allzeros_v32i16_and1(<32 x i16> %arg) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: allzeros_v32i16_and1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; EVEX512-LABEL: allzeros_v32i16_and1:
+; EVEX512: # %bb.0:
+; EVEX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
+; EVEX512-NEXT: kortestw %k0, %k0
+; EVEX512-NEXT: sete %al
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v32i16_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489]
+; AVX10-256-NEXT: vptest %ymm1, %ymm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <32 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
%tmp2 = bitcast <32 x i1> %tmp1 to i32
@@ -1704,6 +1906,14 @@ define i1 @allones_v4i32_and1(<4 x i32> %arg) {
; SKX-NEXT: cmpb $15, %al
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v4i32_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: cmpb $15, %al
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = and <4 x i32> %arg, <i32 1, i32 1, i32 1, i32 1>
%tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
%tmp2 = bitcast <4 x i1> %tmp1 to i4
@@ -1744,6 +1954,13 @@ define i1 @allzeros_v4i32_and1(<4 x i32> %arg) {
; SKX-NEXT: vptest %xmm1, %xmm0
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v4i32_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4294967297,4294967297]
+; AVX10-256-NEXT: vptest %xmm1, %xmm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = and <4 x i32> %arg, <i32 1, i32 1, i32 1, i32 1>
%tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
%tmp2 = bitcast <4 x i1> %tmp1 to i4
@@ -1800,6 +2017,14 @@ define i1 @allones_v8i32_and1(<8 x i32> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v8i32_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <8 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
%tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -1898,13 +2123,24 @@ define i1 @allones_v16i32_and1(<16 x i32> %arg) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: allones_v16i32_and1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: setb %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; EVEX512-LABEL: allones_v16i32_and1:
+; EVEX512: # %bb.0:
+; EVEX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
+; EVEX512-NEXT: kortestw %k0, %k0
+; EVEX512-NEXT: setb %al
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v16i32_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
+; AVX10-256-NEXT: vptestmd %ymm2, %ymm0, %k0
+; AVX10-256-NEXT: vptestmd %ymm2, %ymm1, %k1
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT: kortestw %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <16 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
%tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -1950,13 +2186,22 @@ define i1 @allzeros_v16i32_and1(<16 x i32> %arg) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: allzeros_v16i32_and1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; EVEX512-LABEL: allzeros_v16i32_and1:
+; EVEX512: # %bb.0:
+; EVEX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
+; EVEX512-NEXT: kortestw %k0, %k0
+; EVEX512-NEXT: sete %al
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v16i32_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297]
+; AVX10-256-NEXT: vptest %ymm1, %ymm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <16 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
%tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -1999,6 +2244,14 @@ define i1 @allones_v2i64_and1(<2 x i64> %arg) {
; SKX-NEXT: cmpb $3, %al
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v2i64_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: cmpb $3, %al
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = and <2 x i64> %arg, <i64 1, i64 1>
%tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
%tmp2 = bitcast <2 x i1> %tmp1 to i2
@@ -2039,6 +2292,13 @@ define i1 @allzeros_v2i64_and1(<2 x i64> %arg) {
; SKX-NEXT: vptest %xmm1, %xmm0
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v2i64_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1]
+; AVX10-256-NEXT: vptest %xmm1, %xmm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = and <2 x i64> %arg, <i64 1, i64 1>
%tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
%tmp2 = bitcast <2 x i1> %tmp1 to i2
@@ -2095,6 +2355,15 @@ define i1 @allones_v4i64_and1(<4 x i64> %arg) {
; SKX-NEXT: sete %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v4i64_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: cmpb $15, %al
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <4 x i64> %arg, <i64 1, i64 1, i64 1, i64 1>
%tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
%tmp2 = bitcast <4 x i1> %tmp1 to i4
@@ -2208,6 +2477,18 @@ define i1 @allones_v8i64_and1(<8 x i64> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v8i64_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
+; AVX10-256-NEXT: vptestmq %ymm2, %ymm0, %k0
+; AVX10-256-NEXT: vptestmq %ymm2, %ymm1, %k1
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <8 x i64> %arg, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
%tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
%tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -2253,14 +2534,23 @@ define i1 @allzeros_v8i64_and1(<8 x i64> %arg) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: allzeros_v8i64_and1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1]
-; AVX512-NEXT: vptestmd %zmm1, %zmm0, %k0
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; EVEX512-LABEL: allzeros_v8i64_and1:
+; EVEX512: # %bb.0:
+; EVEX512-NEXT: vpbroadcastq {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1]
+; EVEX512-NEXT: vptestmd %zmm1, %zmm0, %k0
+; EVEX512-NEXT: kortestw %k0, %k0
+; EVEX512-NEXT: sete %al
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v8i64_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
+; AVX10-256-NEXT: vptest %ymm1, %ymm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <8 x i64> %arg, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
%tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
%tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -2299,6 +2589,13 @@ define i1 @allones_v16i8_and4(<16 x i8> %arg) {
; SKX-NEXT: kortestw %k0, %k0
; SKX-NEXT: setb %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v16i8_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; AVX10-256-NEXT: kortestw %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: retq
%tmp = and <16 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
%tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
%tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -2339,6 +2636,13 @@ define i1 @allzeros_v16i8_and4(<16 x i8> %arg) {
; SKX-NEXT: vptest %xmm1, %xmm0
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v16i8_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} xmm1 = [289360691352306692,289360691352306692]
+; AVX10-256-NEXT: vptest %xmm1, %xmm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = and <16 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
%tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
%tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -2392,6 +2696,14 @@ define i1 @allones_v32i8_and4(<32 x i8> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v32i8_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; AVX10-256-NEXT: kortestd %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <32 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
%tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
%tmp2 = bitcast <32 x i1> %tmp1 to i32
@@ -2499,6 +2811,16 @@ define i1 @allones_v64i8_and4(<64 x i8> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v64i8_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastb {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
+; AVX10-256-NEXT: vptestmb %ymm2, %ymm0, %k1
+; AVX10-256-NEXT: vptestmb %ymm2, %ymm1, %k0 {%k1}
+; AVX10-256-NEXT: kortestd %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <64 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
%tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
%tmp2 = bitcast <64 x i1> %tmp1 to i64
@@ -2544,13 +2866,22 @@ define i1 @allzeros_v64i8_and4(<64 x i8> %arg) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: allzeros_v64i8_and4:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; EVEX512-LABEL: allzeros_v64i8_and4:
+; EVEX512: # %bb.0:
+; EVEX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
+; EVEX512-NEXT: kortestw %k0, %k0
+; EVEX512-NEXT: sete %al
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v64i8_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [289360691352306692,289360691352306692,289360691352306692,289360691352306692]
+; AVX10-256-NEXT: vptest %ymm1, %ymm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <64 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
%tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
%tmp2 = bitcast <64 x i1> %tmp1 to i64
@@ -2595,6 +2926,13 @@ define i1 @allones_v8i16_and4(<8 x i16> %arg) {
; SKX-NEXT: kortestb %k0, %k0
; SKX-NEXT: setb %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v8i16_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: retq
%tmp = and <8 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
%tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
%tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -2635,6 +2973,13 @@ define i1 @allzeros_v8i16_and4(<8 x i16> %arg) {
; SKX-NEXT: vptest %xmm1, %xmm0
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v8i16_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1125917086973956,1125917086973956]
+; AVX10-256-NEXT: vptest %xmm1, %xmm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = and <8 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
%tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
%tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -2694,6 +3039,14 @@ define i1 @allones_v16i16_and4(<16 x i16> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v16i16_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; AVX10-256-NEXT: kortestw %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <16 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
%tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
%tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -2766,6 +3119,17 @@ define i1 @allones_v32i16_and4(<32 x i16> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v32i16_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastw {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
+; AVX10-256-NEXT: vptestmw %ymm2, %ymm0, %k0
+; AVX10-256-NEXT: vptestmw %ymm2, %ymm1, %k1
+; AVX10-256-NEXT: kunpckwd %k0, %k1, %k0
+; AVX10-256-NEXT: kortestd %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <32 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
%tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
%tmp2 = bitcast <32 x i1> %tmp1 to i32
@@ -2811,13 +3175,22 @@ define i1 @allzeros_v32i16_and4(<32 x i16> %arg) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: allzeros_v32i16_and4:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; EVEX512-LABEL: allzeros_v32i16_and4:
+; EVEX512: # %bb.0:
+; EVEX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
+; EVEX512-NEXT: kortestw %k0, %k0
+; EVEX512-NEXT: sete %al
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v32i16_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1125917086973956,1125917086973956,1125917086973956,1125917086973956]
+; AVX10-256-NEXT: vptest %ymm1, %ymm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <32 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
%tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
%tmp2 = bitcast <32 x i1> %tmp1 to i32
@@ -2905,6 +3278,14 @@ define i1 @allones_v4i32_and4(<4 x i32> %arg) {
; SKX-NEXT: cmpb $15, %al
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v4i32_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: cmpb $15, %al
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = and <4 x i32> %arg, <i32 4, i32 4, i32 4, i32 4>
%tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
%tmp2 = bitcast <4 x i1> %tmp1 to i4
@@ -2945,6 +3326,13 @@ define i1 @allzeros_v4i32_and4(<4 x i32> %arg) {
; SKX-NEXT: vptest %xmm1, %xmm0
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v4i32_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} xmm1 = [17179869188,17179869188]
+; AVX10-256-NEXT: vptest %xmm1, %xmm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = and <4 x i32> %arg, <i32 4, i32 4, i32 4, i32 4>
%tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
%tmp2 = bitcast <4 x i1> %tmp1 to i4
@@ -3001,6 +3389,14 @@ define i1 @allones_v8i32_and4(<8 x i32> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v8i32_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <8 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
%tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
%tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -3099,13 +3495,24 @@ define i1 @allones_v16i32_and4(<16 x i32> %arg) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: allones_v16i32_and4:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: setb %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; EVEX512-LABEL: allones_v16i32_and4:
+; EVEX512: # %bb.0:
+; EVEX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
+; EVEX512-NEXT: kortestw %k0, %k0
+; EVEX512-NEXT: setb %al
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v16i32_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastd {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4]
+; AVX10-256-NEXT: vptestmd %ymm2, %ymm0, %k0
+; AVX10-256-NEXT: vptestmd %ymm2, %ymm1, %k1
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT: kortestw %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <16 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
%tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
%tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -3151,13 +3558,22 @@ define i1 @allzeros_v16i32_and4(<16 x i32> %arg) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: allzeros_v16i32_and4:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; EVEX512-LABEL: allzeros_v16i32_and4:
+; EVEX512: # %bb.0:
+; EVEX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
+; EVEX512-NEXT: kortestw %k0, %k0
+; EVEX512-NEXT: sete %al
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v16i32_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17179869188,17179869188,17179869188,17179869188]
+; AVX10-256-NEXT: vptest %ymm1, %ymm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <16 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
%tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
%tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -3200,6 +3616,14 @@ define i1 @allones_v2i64_and4(<2 x i64> %arg) {
; SKX-NEXT: cmpb $3, %al
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v2i64_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: cmpb $3, %al
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = and <2 x i64> %arg, <i64 4, i64 4>
%tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
%tmp2 = bitcast <2 x i1> %tmp1 to i2
@@ -3240,6 +3664,13 @@ define i1 @allzeros_v2i64_and4(<2 x i64> %arg) {
; SKX-NEXT: vptest %xmm1, %xmm0
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v2i64_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4,4]
+; AVX10-256-NEXT: vptest %xmm1, %xmm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = and <2 x i64> %arg, <i64 4, i64 4>
%tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
%tmp2 = bitcast <2 x i1> %tmp1 to i2
@@ -3296,6 +3727,15 @@ define i1 @allones_v4i64_and4(<4 x i64> %arg) {
; SKX-NEXT: sete %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v4i64_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: cmpb $15, %al
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <4 x i64> %arg, <i64 4, i64 4, i64 4, i64 4>
%tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
%tmp2 = bitcast <4 x i1> %tmp1 to i4
@@ -3409,6 +3849,18 @@ define i1 @allones_v8i64_and4(<8 x i64> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v8i64_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4,4,4,4]
+; AVX10-256-NEXT: vptestmq %ymm2, %ymm0, %k0
+; AVX10-256-NEXT: vptestmq %ymm2, %ymm1, %k1
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <8 x i64> %arg, <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
%tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
%tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -3454,14 +3906,23 @@ define i1 @allzeros_v8i64_and4(<8 x i64> %arg) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: allzeros_v8i64_and4:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4]
-; AVX512-NEXT: vptestmd %zmm1, %zmm0, %k0
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; EVEX512-LABEL: allzeros_v8i64_and4:
+; EVEX512: # %bb.0:
+; EVEX512-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4]
+; EVEX512-NEXT: vptestmd %zmm1, %zmm0, %k0
+; EVEX512-NEXT: kortestw %k0, %k0
+; EVEX512-NEXT: sete %al
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v8i64_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4]
+; AVX10-256-NEXT: vptest %ymm1, %ymm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <8 x i64> %arg, <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
%tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
%tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -3510,6 +3971,15 @@ define i1 @allzeros_v8f32_nnan(<8 x float> %a0) {
; SKX-NEXT: setne %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v8f32_nnan:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX10-256-NEXT: vcmpneqps %ymm1, %ymm0, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: setne %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%1 = fcmp nnan une <8 x float> %a0, zeroinitializer
%2 = bitcast <8 x i1> %1 to i8
%3 = icmp ne i8 %2, 0
@@ -3709,6 +4179,20 @@ define i1 @movmsk_v16i8(<16 x i8> %x, <16 x i8> %y) {
; SKX-NEXT: andb %cl, %al
; SKX-NEXT: # kill: def $al killed $al killed $eax
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_v16i8:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
+; AVX10-256-NEXT: kshiftrw $15, %k0, %k1
+; AVX10-256-NEXT: kmovd %k1, %ecx
+; AVX10-256-NEXT: kshiftrw $8, %k0, %k1
+; AVX10-256-NEXT: kmovd %k1, %edx
+; AVX10-256-NEXT: kshiftrw $3, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: xorb %dl, %al
+; AVX10-256-NEXT: andb %cl, %al
+; AVX10-256-NEXT: # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: retq
%cmp = icmp eq <16 x i8> %x, %y
%e1 = extractelement <16 x i1> %cmp, i32 3
%e2 = extractelement <16 x i1> %cmp, i32 8
@@ -3758,6 +4242,15 @@ define i1 @movmsk_v8i16(<8 x i16> %x, <8 x i16> %y) {
; SKX-NEXT: testb $-109, %al
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_v8i16:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
+; AVX10-256-NEXT: knotb %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: testb $-109, %al
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%cmp = icmp sgt <8 x i16> %x, %y
%e1 = extractelement <8 x i1> %cmp, i32 0
%e2 = extractelement <8 x i1> %cmp, i32 1
@@ -3819,6 +4312,17 @@ define i1 @movmsk_v4i32(<4 x i32> %x, <4 x i32> %y) {
; SKX-NEXT: xorb %cl, %al
; SKX-NEXT: # kill: def $al killed $al killed $eax
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_v4i32:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
+; AVX10-256-NEXT: kshiftrb $3, %k0, %k1
+; AVX10-256-NEXT: kmovd %k1, %ecx
+; AVX10-256-NEXT: kshiftrb $2, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: xorb %cl, %al
+; AVX10-256-NEXT: # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: retq
%cmp = icmp slt <4 x i32> %x, %y
%e1 = extractelement <4 x i1> %cmp, i32 2
%e2 = extractelement <4 x i1> %cmp, i32 3
@@ -3870,6 +4374,14 @@ define i1 @movmsk_and_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SKX-NEXT: cmpb $3, %al
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_and_v2i64:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpcmpneqq %xmm1, %xmm0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: cmpb $3, %al
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%cmp = icmp ne <2 x i64> %x, %y
%e1 = extractelement <2 x i1> %cmp, i32 0
%e2 = extractelement <2 x i1> %cmp, i32 1
@@ -3946,6 +4458,14 @@ define i1 @movmsk_v4f32(<4 x float> %x, <4 x float> %y) {
; SKX-NEXT: testb $14, %al
; SKX-NEXT: setne %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_v4f32:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vcmpeq_uqps %xmm1, %xmm0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: testb $14, %al
+; AVX10-256-NEXT: setne %al
+; AVX10-256-NEXT: retq
%cmp = fcmp ueq <4 x float> %x, %y
%e1 = extractelement <4 x i1> %cmp, i32 1
%e2 = extractelement <4 x i1> %cmp, i32 2
@@ -3991,6 +4511,14 @@ define i1 @movmsk_and_v2f64(<2 x double> %x, <2 x double> %y) {
; SKX-NEXT: cmpb $3, %al
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_and_v2f64:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vcmplepd %xmm0, %xmm1, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: cmpb $3, %al
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%cmp = fcmp oge <2 x double> %x, %y
%e1 = extractelement <2 x i1> %cmp, i32 0
%e2 = extractelement <2 x i1> %cmp, i32 1
@@ -4031,6 +4559,13 @@ define i1 @movmsk_or_v2f64(<2 x double> %x, <2 x double> %y) {
; SKX-NEXT: kortestb %k0, %k0
; SKX-NEXT: setne %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_or_v2f64:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vcmplepd %xmm0, %xmm1, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: setne %al
+; AVX10-256-NEXT: retq
%cmp = fcmp oge <2 x double> %x, %y
%e1 = extractelement <2 x i1> %cmp, i32 0
%e2 = extractelement <2 x i1> %cmp, i32 1
@@ -4074,6 +4609,16 @@ define i1 @movmsk_v16i8_var(<16 x i8> %x, <16 x i8> %y, i32 %z) {
; SKX-NEXT: andl $15, %edi
; SKX-NEXT: movzbl -24(%rsp,%rdi), %eax
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_v16i8_var:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: # kill: def $edi killed $edi def $rdi
+; AVX10-256-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
+; AVX10-256-NEXT: vpmovm2b %k0, %xmm0
+; AVX10-256-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX10-256-NEXT: andl $15, %edi
+; AVX10-256-NEXT: movzbl -24(%rsp,%rdi), %eax
+; AVX10-256-NEXT: retq
%cmp = icmp eq <16 x i8> %x, %y
%val = extractelement <16 x i1> %cmp, i32 %z
ret i1 %val
@@ -4121,6 +4666,16 @@ define i1 @movmsk_v8i16_var(<8 x i16> %x, <8 x i16> %y, i32 %z) {
; SKX-NEXT: andl $7, %edi
; SKX-NEXT: movzbl -24(%rsp,%rdi,2), %eax
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_v8i16_var:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: # kill: def $edi killed $edi def $rdi
+; AVX10-256-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
+; AVX10-256-NEXT: vpmovm2w %k0, %xmm0
+; AVX10-256-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX10-256-NEXT: andl $7, %edi
+; AVX10-256-NEXT: movzbl -24(%rsp,%rdi,2), %eax
+; AVX10-256-NEXT: retq
%cmp = icmp sgt <8 x i16> %x, %y
%val = extractelement <8 x i1> %cmp, i32 %z
ret i1 %val
@@ -4165,6 +4720,16 @@ define i1 @movmsk_v4i32_var(<4 x i32> %x, <4 x i32> %y, i32 %z) {
; SKX-NEXT: andl $3, %edi
; SKX-NEXT: movzbl -24(%rsp,%rdi,4), %eax
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_v4i32_var:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: # kill: def $edi killed $edi def $rdi
+; AVX10-256-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
+; AVX10-256-NEXT: vpmovm2d %k0, %xmm0
+; AVX10-256-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX10-256-NEXT: andl $3, %edi
+; AVX10-256-NEXT: movzbl -24(%rsp,%rdi,4), %eax
+; AVX10-256-NEXT: retq
%cmp = icmp slt <4 x i32> %x, %y
%val = extractelement <4 x i1> %cmp, i32 %z
ret i1 %val
@@ -4222,6 +4787,16 @@ define i1 @movmsk_v2i64_var(<2 x i64> %x, <2 x i64> %y, i32 %z) {
; SKX-NEXT: andl $1, %edi
; SKX-NEXT: movzbl -24(%rsp,%rdi,8), %eax
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_v2i64_var:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: # kill: def $edi killed $edi def $rdi
+; AVX10-256-NEXT: vpcmpneqq %xmm1, %xmm0, %k0
+; AVX10-256-NEXT: vpmovm2q %k0, %xmm0
+; AVX10-256-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX10-256-NEXT: andl $1, %edi
+; AVX10-256-NEXT: movzbl -24(%rsp,%rdi,8), %eax
+; AVX10-256-NEXT: retq
%cmp = icmp ne <2 x i64> %x, %y
%val = extractelement <2 x i1> %cmp, i32 %z
ret i1 %val
@@ -4269,6 +4844,16 @@ define i1 @movmsk_v4f32_var(<4 x float> %x, <4 x float> %y, i32 %z) {
; SKX-NEXT: andl $3, %edi
; SKX-NEXT: movzbl -24(%rsp,%rdi,4), %eax
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_v4f32_var:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: # kill: def $edi killed $edi def $rdi
+; AVX10-256-NEXT: vcmpeq_uqps %xmm1, %xmm0, %k0
+; AVX10-256-NEXT: vpmovm2d %k0, %xmm0
+; AVX10-256-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX10-256-NEXT: andl $3, %edi
+; AVX10-256-NEXT: movzbl -24(%rsp,%rdi,4), %eax
+; AVX10-256-NEXT: retq
%cmp = fcmp ueq <4 x float> %x, %y
%val = extractelement <4 x i1> %cmp, i32 %z
ret i1 %val
@@ -4313,6 +4898,16 @@ define i1 @movmsk_v2f64_var(<2 x double> %x, <2 x double> %y, i32 %z) {
; SKX-NEXT: andl $1, %edi
; SKX-NEXT: movzbl -24(%rsp,%rdi,8), %eax
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_v2f64_var:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: # kill: def $edi killed $edi def $rdi
+; AVX10-256-NEXT: vcmplepd %xmm0, %xmm1, %k0
+; AVX10-256-NEXT: vpmovm2q %k0, %xmm0
+; AVX10-256-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX10-256-NEXT: andl $1, %edi
+; AVX10-256-NEXT: movzbl -24(%rsp,%rdi,8), %eax
+; AVX10-256-NEXT: retq
%cmp = fcmp oge <2 x double> %x, %y
%val = extractelement <2 x i1> %cmp, i32 %z
ret i1 %val
@@ -4371,6 +4966,18 @@ define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) {
; SKX-NEXT: testb $1, %cl
; SKX-NEXT: cmovel %edx, %eax
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: PR39665_c_ray:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vcmpltpd %xmm0, %xmm1, %k0
+; AVX10-256-NEXT: kmovd %k0, %ecx
+; AVX10-256-NEXT: testb $2, %cl
+; AVX10-256-NEXT: movl $42, %eax
+; AVX10-256-NEXT: movl $99, %edx
+; AVX10-256-NEXT: cmovel %edx, %eax
+; AVX10-256-NEXT: testb $1, %cl
+; AVX10-256-NEXT: cmovel %edx, %eax
+; AVX10-256-NEXT: retq
%cmp = fcmp ogt <2 x double> %x, %y
%e1 = extractelement <2 x i1> %cmp, i32 0
%e2 = extractelement <2 x i1> %cmp, i32 1
@@ -4423,6 +5030,16 @@ define i32 @PR39665_c_ray_opt(<2 x double> %x, <2 x double> %y) {
; SKX-NEXT: movl $99, %eax
; SKX-NEXT: cmovel %ecx, %eax
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: PR39665_c_ray_opt:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vcmpltpd %xmm0, %xmm1, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: cmpb $3, %al
+; AVX10-256-NEXT: movl $42, %ecx
+; AVX10-256-NEXT: movl $99, %eax
+; AVX10-256-NEXT: cmovel %ecx, %eax
+; AVX10-256-NEXT: retq
%cmp = fcmp ogt <2 x double> %x, %y
%shift = shufflevector <2 x i1> %cmp, <2 x i1> poison, <2 x i32> <i32 1, i32 undef>
%1 = and <2 x i1> %cmp, %shift
@@ -4551,6 +5168,25 @@ define i32 @pr67287(<2 x i64> %broadcast.splatinsert25) {
; SKX-NEXT: movw $0, 0
; SKX-NEXT: xorl %eax, %eax
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: pr67287:
+; AVX10-256: # %bb.0: # %entry
+; AVX10-256-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX10-256-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX10-256-NEXT: vptestnmq %xmm0, %xmm0, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: jne .LBB97_2
+; AVX10-256-NEXT: # %bb.1: # %entry
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: testb $1, %al
+; AVX10-256-NEXT: jne .LBB97_2
+; AVX10-256-NEXT: # %bb.3: # %middle.block
+; AVX10-256-NEXT: xorl %eax, %eax
+; AVX10-256-NEXT: retq
+; AVX10-256-NEXT: .LBB97_2:
+; AVX10-256-NEXT: movw $0, 0
+; AVX10-256-NEXT: xorl %eax, %eax
+; AVX10-256-NEXT: retq
entry:
%0 = and <2 x i64> %broadcast.splatinsert25, <i64 4294967295, i64 4294967295>
%1 = icmp eq <2 x i64> %0, zeroinitializer
diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index d025fe955be515..d7183cf47eb13a 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -939,7 +939,8 @@ std::vector<MCInst> ExegesisX86Target::setRegTo(const MCSubtargetInfo &STI,
}
break;
case 64:
- if (STI.getFeatureBits()[X86::FeatureBWI]) {
+ if (STI.getFeatureBits()[X86::FeatureBWI] &&
+ STI.getFeatureBits()[X86::FeatureEVEX512]) {
ConstantInliner CI(Value);
return CI.loadAndFinalize(Reg, Value.getBitWidth(), X86::KMOVQkm);
}
More information about the llvm-commits
mailing list