[llvm] [X86] Improve optmasks handling for AVX10.1-256 (PR #73074)
Evgenii Kudriashov via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 23 20:44:06 PST 2023
https://github.com/e-kud updated https://github.com/llvm/llvm-project/pull/73074
>From 35b27a238211f203f6c6c50a3b60f5b251524a35 Mon Sep 17 00:00:00 2001
From: Evgenii Kudriashov <evgenii.kudriashov at intel.com>
Date: Tue, 7 Nov 2023 18:27:23 -0800
Subject: [PATCH 1/2] [X86] Improve optmasks handling for AVX10.1-256
Quadword opmask instructions are only supported on processors
supporting vector lengths of 512 bits.
---
llvm/lib/Target/X86/X86DomainReassignment.cpp | 35 +-
llvm/lib/Target/X86/X86ISelLowering.cpp | 5 +-
llvm/lib/Target/X86/X86InstrInfo.cpp | 12 +-
llvm/lib/Target/X86/X86Subtarget.h | 3 +-
llvm/test/CodeGen/X86/avx512-mask-op.ll | 860 ++++++++++++-
llvm/test/CodeGen/X86/avx512-vec-cmp.ll | 1075 ++++++++++++++---
llvm/test/CodeGen/X86/avx512bw-mask-op.ll | 105 +-
llvm/test/CodeGen/X86/kshift.ll | 301 +++++
llvm/test/CodeGen/X86/movmsk-cmp.ll | 738 +++++++++++
llvm/tools/llvm-exegesis/lib/X86/Target.cpp | 3 +-
10 files changed, 2900 insertions(+), 237 deletions(-)
diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp
index fa8d5c752a3d273..a1681d9ff73ee5e 100644
--- a/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -651,37 +651,30 @@ void X86DomainReassignment::initConverters() {
if (STI->hasBWI()) {
createReplacer(X86::MOV32rm, X86::KMOVDkm);
- createReplacer(X86::MOV64rm, X86::KMOVQkm);
-
createReplacer(X86::MOV32mr, X86::KMOVDmk);
- createReplacer(X86::MOV64mr, X86::KMOVQmk);
-
createReplacer(X86::MOV32rr, X86::KMOVDkk);
- createReplacer(X86::MOV64rr, X86::KMOVQkk);
-
createReplacer(X86::SHR32ri, X86::KSHIFTRDri);
- createReplacer(X86::SHR64ri, X86::KSHIFTRQri);
-
createReplacer(X86::SHL32ri, X86::KSHIFTLDri);
- createReplacer(X86::SHL64ri, X86::KSHIFTLQri);
-
createReplacer(X86::ADD32rr, X86::KADDDrr);
- createReplacer(X86::ADD64rr, X86::KADDQrr);
-
createReplacer(X86::NOT32r, X86::KNOTDrr);
- createReplacer(X86::NOT64r, X86::KNOTQrr);
-
createReplacer(X86::OR32rr, X86::KORDrr);
- createReplacer(X86::OR64rr, X86::KORQrr);
-
createReplacer(X86::AND32rr, X86::KANDDrr);
- createReplacer(X86::AND64rr, X86::KANDQrr);
-
createReplacer(X86::ANDN32rr, X86::KANDNDrr);
- createReplacer(X86::ANDN64rr, X86::KANDNQrr);
-
createReplacer(X86::XOR32rr, X86::KXORDrr);
- createReplacer(X86::XOR64rr, X86::KXORQrr);
+
+ if (STI->hasEVEX512()) {
+ createReplacer(X86::MOV64rm, X86::KMOVQkm);
+ createReplacer(X86::MOV64mr, X86::KMOVQmk);
+ createReplacer(X86::MOV64rr, X86::KMOVQkk);
+ createReplacer(X86::SHR64ri, X86::KSHIFTRQri);
+ createReplacer(X86::SHL64ri, X86::KSHIFTLQri);
+ createReplacer(X86::ADD64rr, X86::KADDQrr);
+ createReplacer(X86::NOT64r, X86::KNOTQrr);
+ createReplacer(X86::OR64rr, X86::KORQrr);
+ createReplacer(X86::AND64rr, X86::KANDQrr);
+ createReplacer(X86::ANDN64rr, X86::KANDNQrr);
+ createReplacer(X86::XOR64rr, X86::KXORQrr);
+ }
// TODO: KTEST is not a replacement for TEST due to flag differences. Need
// to prove only Z flag is used.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 011baa545dd82fe..1ad3ac1f7b89c03 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2056,9 +2056,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// AVX512BW..
if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
- addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
+ if (Subtarget.hasEVEX512())
+ addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
+ if (VT == MVT::v64i1 && !Subtarget.hasEVEX512())
+ continue;
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 3ca7b427ae2067f..9424319cf7dfcb9 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -3526,7 +3526,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
// All KMASK RegClasses hold the same k registers, can be tested against anyone.
if (X86::VK16RegClass.contains(SrcReg)) {
if (X86::GR64RegClass.contains(DestReg)) {
- assert(Subtarget.hasBWI());
+ assert(Subtarget.hasBWI() && Subtarget.hasEVEX512());
return X86::KMOVQrk;
}
if (X86::GR32RegClass.contains(DestReg))
@@ -3539,7 +3539,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
// All KMASK RegClasses hold the same k registers, can be tested against anyone.
if (X86::VK16RegClass.contains(DestReg)) {
if (X86::GR64RegClass.contains(SrcReg)) {
- assert(Subtarget.hasBWI());
+ assert(Subtarget.hasBWI() && Subtarget.hasEVEX512());
return X86::KMOVQkr;
}
if (X86::GR32RegClass.contains(SrcReg))
@@ -3653,7 +3653,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = X86::VMOVAPSZrr;
// All KMASK RegClasses hold the same k registers, can be tested against anyone.
else if (X86::VK16RegClass.contains(DestReg, SrcReg))
- Opc = Subtarget.hasBWI() ? X86::KMOVQkk : X86::KMOVWkk;
+ Opc = Subtarget.hasBWI() && Subtarget.hasEVEX512() ? X86::KMOVQkk
+ : X86::KMOVWkk;
if (!Opc)
Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
@@ -3773,7 +3774,8 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
if (X86::RFP64RegClass.hasSubClassEq(RC))
return Load ? X86::LD_Fp64m : X86::ST_Fp64m;
if (X86::VK64RegClass.hasSubClassEq(RC)) {
- assert(STI.hasBWI() && "KMOVQ requires BWI");
+ assert(STI.hasBWI() && STI.hasEVEX512() &&
+ "KMOVQ requires BWI with 512-bit vectors");
return Load ? X86::KMOVQkm : X86::KMOVQmk;
}
llvm_unreachable("Unknown 8-byte regclass");
@@ -10144,7 +10146,7 @@ void X86InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
return;
// KXOR is safe to use because it doesn't affect flags.
- unsigned Op = ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr;
+ unsigned Op = ST.hasBWI() && ST.hasEVEX512() ? X86::KXORQrr : X86::KXORWrr;
BuildMI(MBB, Iter, DL, get(Op), Reg)
.addReg(Reg, RegState::Undef)
.addReg(Reg, RegState::Undef);
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index a458b5f9ec8fbb9..47d24f4be58a3e1 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -244,7 +244,8 @@ class X86Subtarget final : public X86GenSubtargetInfo {
// TODO: Currently we're always allowing widening on CPUs without VLX,
// because for many cases we don't have a better option.
bool canExtendTo512DQ() const {
- return hasAVX512() && (!hasVLX() || getPreferVectorWidth() >= 512);
+ return hasAVX512() && (!hasVLX() || getPreferVectorWidth() >= 512) &&
+ hasEVEX512();
}
bool canExtendTo512BW() const {
return hasBWI() && canExtendTo512DQ();
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 9e689341f7b88e3..99eef49417f33b6 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -3,6 +3,7 @@
; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefixes=CHECK,SKX
; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW
; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512DQ
+; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=CHECK,AVX10-256
; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=X86
@@ -131,6 +132,13 @@ define void @mask8_mem(ptr %ptr) {
; AVX512DQ-NEXT: kmovb %k0, (%rdi)
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: mask8_mem:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovb (%rdi), %k0
+; AVX10-256-NEXT: knotb %k0, %k0
+; AVX10-256-NEXT: kmovb %k0, (%rdi)
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: mask8_mem:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -205,6 +213,15 @@ define i16 @mand16_mem(ptr %x, ptr %y) {
; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: mand16_mem:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovw (%rdi), %k0
+; AVX10-256-NEXT: kmovw (%rsi), %k1
+; AVX10-256-NEXT: korw %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: ## kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: mand16_mem:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -257,6 +274,14 @@ define i8 @shuf_test1(i16 %v) nounwind {
; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: shuf_test1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovd %edi, %k0
+; AVX10-256-NEXT: kshiftrw $8, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: ## kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: shuf_test1:
; X86: ## %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
@@ -304,6 +329,15 @@ define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: zext_test1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpnleud %ymm2, %ymm0, %k0
+; AVX10-256-NEXT: kshiftrb $5, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: andl $1, %eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: zext_test1:
; X86: ## %bb.0:
; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
@@ -359,6 +393,16 @@ define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: zext_test2:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpnleud %ymm2, %ymm0, %k0
+; AVX10-256-NEXT: kshiftrb $5, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: andl $1, %eax
+; AVX10-256-NEXT: ## kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: zext_test2:
; X86: ## %bb.0:
; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
@@ -415,6 +459,16 @@ define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: zext_test3:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpnleud %ymm2, %ymm0, %k0
+; AVX10-256-NEXT: kshiftrb $5, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: andb $1, %al
+; AVX10-256-NEXT: ## kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: zext_test3:
; X86: ## %bb.0:
; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
@@ -506,6 +560,14 @@ define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test4:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpgtq %ymm3, %ymm2, %k1
+; AVX10-256-NEXT: vpcmpleq %ymm1, %ymm0, %k0 {%k1}
+; AVX10-256-NEXT: vpmovm2d %k0, %xmm0
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test4:
; X86: ## %bb.0:
; X86-NEXT: vpcmpgtq %ymm3, %ymm2, %k1
@@ -567,6 +629,13 @@ define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test5:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpgtq %xmm0, %xmm1, %k1
+; AVX10-256-NEXT: vpcmpleq %xmm3, %xmm2, %k0 {%k1}
+; AVX10-256-NEXT: vpmovm2q %k0, %xmm0
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test5:
; X86: ## %bb.0:
; X86-NEXT: vpcmpgtq %xmm0, %xmm1, %k1
@@ -645,6 +714,14 @@ define void @test7(<8 x i1> %mask) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test7:
+; AVX10-256: ## %bb.0: ## %allocas
+; AVX10-256-NEXT: vpsllw $15, %xmm0, %xmm0
+; AVX10-256-NEXT: vpmovw2m %xmm0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: orb $85, %al
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test7:
; X86: ## %bb.0: ## %allocas
; X86-NEXT: vpsllw $15, %xmm0, %xmm0
@@ -732,6 +809,24 @@ define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test8:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: cmpl %esi, %edi
+; AVX10-256-NEXT: jg LBB17_1
+; AVX10-256-NEXT: ## %bb.2:
+; AVX10-256-NEXT: kxorw %k0, %k0, %k0
+; AVX10-256-NEXT: vpmovm2b %k0, %xmm0
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+; AVX10-256-NEXT: LBB17_1:
+; AVX10-256-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX10-256-NEXT: vpcmpgtd %ymm2, %ymm0, %k0
+; AVX10-256-NEXT: vpcmpgtd %ymm2, %ymm1, %k1
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT: vpmovm2b %k0, %xmm0
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test8:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -821,6 +916,20 @@ define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test9:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: cmpl %esi, %edi
+; AVX10-256-NEXT: jg LBB18_1
+; AVX10-256-NEXT: ## %bb.2:
+; AVX10-256-NEXT: vpsllw $7, %xmm1, %xmm0
+; AVX10-256-NEXT: jmp LBB18_3
+; AVX10-256-NEXT: LBB18_1:
+; AVX10-256-NEXT: vpsllw $7, %xmm0, %xmm0
+; AVX10-256-NEXT: LBB18_3:
+; AVX10-256-NEXT: vpmovb2m %xmm0, %k0
+; AVX10-256-NEXT: vpmovm2b %k0, %xmm0
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test9:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -907,6 +1016,20 @@ define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test10:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: cmpl %esi, %edi
+; AVX10-256-NEXT: jg LBB19_1
+; AVX10-256-NEXT: ## %bb.2:
+; AVX10-256-NEXT: vpsllw $15, %xmm1, %xmm0
+; AVX10-256-NEXT: jmp LBB19_3
+; AVX10-256-NEXT: LBB19_1:
+; AVX10-256-NEXT: vpsllw $15, %xmm0, %xmm0
+; AVX10-256-NEXT: LBB19_3:
+; AVX10-256-NEXT: vpmovw2m %xmm0, %k0
+; AVX10-256-NEXT: vpmovm2w %k0, %xmm0
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test10:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -989,6 +1112,20 @@ define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test11:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: cmpl %esi, %edi
+; AVX10-256-NEXT: jg LBB20_1
+; AVX10-256-NEXT: ## %bb.2:
+; AVX10-256-NEXT: vpslld $31, %xmm1, %xmm0
+; AVX10-256-NEXT: jmp LBB20_3
+; AVX10-256-NEXT: LBB20_1:
+; AVX10-256-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX10-256-NEXT: LBB20_3:
+; AVX10-256-NEXT: vpmovd2m %xmm0, %k0
+; AVX10-256-NEXT: vpmovm2d %k0, %xmm0
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test11:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1120,6 +1257,16 @@ define <16 x i1> @test15(i32 %x, i32 %y) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test15:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: cmpl %esi, %edi
+; AVX10-256-NEXT: movl $21845, %eax ## imm = 0x5555
+; AVX10-256-NEXT: movl $1, %ecx
+; AVX10-256-NEXT: cmovgl %eax, %ecx
+; AVX10-256-NEXT: kmovd %ecx, %k0
+; AVX10-256-NEXT: vpmovm2b %k0, %xmm0
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test15:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1231,6 +1378,23 @@ define <64 x i8> @test16(i64 %x) {
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test16:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovd %edi, %k0
+; AVX10-256-NEXT: shrq $32, %rdi
+; AVX10-256-NEXT: kmovd %edi, %k1
+; AVX10-256-NEXT: movl $-33, %eax
+; AVX10-256-NEXT: kmovd %eax, %k2
+; AVX10-256-NEXT: kandd %k2, %k0, %k0
+; AVX10-256-NEXT: movb $1, %al
+; AVX10-256-NEXT: kmovd %eax, %k2
+; AVX10-256-NEXT: kshiftld $31, %k2, %k2
+; AVX10-256-NEXT: kshiftrd $26, %k2, %k2
+; AVX10-256-NEXT: kord %k2, %k0, %k0
+; AVX10-256-NEXT: vpmovm2b %k0, %ymm0
+; AVX10-256-NEXT: vpmovm2b %k1, %ymm1
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test16:
; X86: ## %bb.0:
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0
@@ -1350,6 +1514,24 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test17:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovd %edi, %k0
+; AVX10-256-NEXT: shrq $32, %rdi
+; AVX10-256-NEXT: kmovd %edi, %k1
+; AVX10-256-NEXT: cmpl %edx, %esi
+; AVX10-256-NEXT: setg %al
+; AVX10-256-NEXT: movl $-33, %ecx
+; AVX10-256-NEXT: kmovd %ecx, %k2
+; AVX10-256-NEXT: kandd %k2, %k0, %k0
+; AVX10-256-NEXT: kmovd %eax, %k2
+; AVX10-256-NEXT: kshiftld $31, %k2, %k2
+; AVX10-256-NEXT: kshiftrd $26, %k2, %k2
+; AVX10-256-NEXT: kord %k2, %k0, %k0
+; AVX10-256-NEXT: vpmovm2b %k0, %ymm0
+; AVX10-256-NEXT: vpmovm2b %k1, %ymm1
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test17:
; X86: ## %bb.0:
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0
@@ -1455,6 +1637,24 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test18:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovd %edi, %k0
+; AVX10-256-NEXT: kmovd %esi, %k1
+; AVX10-256-NEXT: kshiftrw $8, %k1, %k2
+; AVX10-256-NEXT: kshiftrw $9, %k1, %k1
+; AVX10-256-NEXT: movb $-65, %al
+; AVX10-256-NEXT: kmovd %eax, %k3
+; AVX10-256-NEXT: kandb %k3, %k0, %k0
+; AVX10-256-NEXT: kshiftlb $6, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: kshiftlb $1, %k0, %k0
+; AVX10-256-NEXT: kshiftrb $1, %k0, %k0
+; AVX10-256-NEXT: kshiftlb $7, %k2, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: vpmovm2w %k0, %xmm0
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test18:
; X86: ## %bb.0:
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
@@ -1521,6 +1721,15 @@ define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
; AVX512DQ-NEXT: vpandq %zmm0, %zmm1, %zmm0
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test21:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllw $7, %ymm2, %ymm2
+; AVX10-256-NEXT: vpmovb2m %ymm2, %k1
+; AVX10-256-NEXT: vmovdqu16 %ymm0, %ymm0 {%k1} {z}
+; AVX10-256-NEXT: kshiftrd $16, %k1, %k1
+; AVX10-256-NEXT: vmovdqu16 %ymm1, %ymm1 {%k1} {z}
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test21:
; X86: ## %bb.0:
; X86-NEXT: vpsllw $7, %ymm1, %ymm1
@@ -1571,6 +1780,13 @@ define void @test22(<4 x i1> %a, ptr %addr) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test22:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX10-256-NEXT: vpmovd2m %xmm0, %k0
+; AVX10-256-NEXT: kmovb %k0, (%rdi)
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test22:
; X86: ## %bb.0:
; X86-NEXT: vpslld $31, %xmm0, %xmm0
@@ -1622,6 +1838,13 @@ define void @test23(<2 x i1> %a, ptr %addr) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test23:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllq $63, %xmm0, %xmm0
+; AVX10-256-NEXT: vpmovq2m %xmm0, %k0
+; AVX10-256-NEXT: kmovb %k0, (%rdi)
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test23:
; X86: ## %bb.0:
; X86-NEXT: vpsllq $63, %xmm0, %xmm0
@@ -1672,6 +1895,15 @@ define void @store_v1i1(<1 x i1> %c , ptr %ptr) {
; AVX512DQ-NEXT: kmovb %k0, (%rsi)
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: store_v1i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovd %edi, %k0
+; AVX10-256-NEXT: knotw %k0, %k0
+; AVX10-256-NEXT: kshiftlb $7, %k0, %k0
+; AVX10-256-NEXT: kshiftrb $7, %k0, %k0
+; AVX10-256-NEXT: kmovb %k0, (%rsi)
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: store_v1i1:
; X86: ## %bb.0:
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
@@ -1730,6 +1962,16 @@ define void @store_v2i1(<2 x i1> %c , ptr %ptr) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: store_v2i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllq $63, %xmm0, %xmm0
+; AVX10-256-NEXT: vpmovq2m %xmm0, %k0
+; AVX10-256-NEXT: knotw %k0, %k0
+; AVX10-256-NEXT: kshiftlb $6, %k0, %k0
+; AVX10-256-NEXT: kshiftrb $6, %k0, %k0
+; AVX10-256-NEXT: kmovb %k0, (%rdi)
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: store_v2i1:
; X86: ## %bb.0:
; X86-NEXT: vpsllq $63, %xmm0, %xmm0
@@ -1789,6 +2031,16 @@ define void @store_v4i1(<4 x i1> %c , ptr %ptr) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: store_v4i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX10-256-NEXT: vpmovd2m %xmm0, %k0
+; AVX10-256-NEXT: knotw %k0, %k0
+; AVX10-256-NEXT: kshiftlb $4, %k0, %k0
+; AVX10-256-NEXT: kshiftrb $4, %k0, %k0
+; AVX10-256-NEXT: kmovb %k0, (%rdi)
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: store_v4i1:
; X86: ## %bb.0:
; X86-NEXT: vpslld $31, %xmm0, %xmm0
@@ -1843,6 +2095,14 @@ define void @store_v8i1(<8 x i1> %c , ptr %ptr) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: store_v8i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllw $15, %xmm0, %xmm0
+; AVX10-256-NEXT: vpmovw2m %xmm0, %k0
+; AVX10-256-NEXT: knotb %k0, %k0
+; AVX10-256-NEXT: kmovb %k0, (%rdi)
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: store_v8i1:
; X86: ## %bb.0:
; X86-NEXT: vpsllw $15, %xmm0, %xmm0
@@ -1893,6 +2153,14 @@ define void @store_v16i1(<16 x i1> %c , ptr %ptr) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: store_v16i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllw $7, %xmm0, %xmm0
+; AVX10-256-NEXT: vpmovb2m %xmm0, %k0
+; AVX10-256-NEXT: knotw %k0, %k0
+; AVX10-256-NEXT: kmovw %k0, (%rdi)
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: store_v16i1:
; X86: ## %bb.0:
; X86-NEXT: vpsllw $7, %xmm0, %xmm0
@@ -2008,6 +2276,12 @@ define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) {
; AVX512DQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test_build_vec_v32i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX10-256-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test_build_vec_v32i1:
; X86: ## %bb.0:
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0
@@ -2041,6 +2315,12 @@ define <32 x i16> @test_build_vec_v32i1_optsize(<32 x i16> %x) optsize {
; AVX512DQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test_build_vec_v32i1_optsize:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX10-256-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test_build_vec_v32i1_optsize:
; X86: ## %bb.0:
; X86-NEXT: movl $1497715861, %eax ## imm = 0x59455495
@@ -2076,6 +2356,12 @@ define <32 x i16> @test_build_vec_v32i1_pgso(<32 x i16> %x) !prof !14 {
; AVX512DQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test_build_vec_v32i1_pgso:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX10-256-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test_build_vec_v32i1_pgso:
; X86: ## %bb.0:
; X86-NEXT: movl $1497715861, %eax ## imm = 0x59455495
@@ -2107,6 +2393,12 @@ define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) {
; AVX512DQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test_build_vec_v64i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX10-256-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test_build_vec_v64i1:
; X86: ## %bb.0:
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0
@@ -2182,6 +2474,31 @@ define void @ktest_1(<8 x double> %in, ptr %base) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: ktest_1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpgtpd (%rdi), %ymm0, %k1
+; AVX10-256-NEXT: vcmpgtpd 32(%rdi), %ymm1, %k2
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k0
+; AVX10-256-NEXT: korb %k0, %k1, %k0
+; AVX10-256-NEXT: vmovupd 40(%rdi), %ymm2 {%k2} {z}
+; AVX10-256-NEXT: vmovupd 8(%rdi), %ymm3 {%k1} {z}
+; AVX10-256-NEXT: vcmpltpd %ymm3, %ymm0, %k1
+; AVX10-256-NEXT: vcmpltpd %ymm2, %ymm1, %k2
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: ktestb %k1, %k0
+; AVX10-256-NEXT: je LBB44_2
+; AVX10-256-NEXT: ## %bb.1: ## %L1
+; AVX10-256-NEXT: vmovapd %ymm0, (%rdi)
+; AVX10-256-NEXT: vmovapd %ymm1, 32(%rdi)
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+; AVX10-256-NEXT: LBB44_2: ## %L2
+; AVX10-256-NEXT: vmovapd %ymm0, 8(%rdi)
+; AVX10-256-NEXT: vmovapd %ymm1, 40(%rdi)
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: ktest_1:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2316,6 +2633,43 @@ define void @ktest_2(<32 x float> %in, ptr %base) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: ktest_2:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpgtps (%rdi), %ymm0, %k1
+; AVX10-256-NEXT: vcmpgtps 32(%rdi), %ymm1, %k2
+; AVX10-256-NEXT: kunpckbw %k1, %k2, %k0
+; AVX10-256-NEXT: vcmpgtps 64(%rdi), %ymm2, %k3
+; AVX10-256-NEXT: vcmpgtps 96(%rdi), %ymm3, %k4
+; AVX10-256-NEXT: kunpckbw %k3, %k4, %k5
+; AVX10-256-NEXT: kunpckwd %k0, %k5, %k0
+; AVX10-256-NEXT: vmovups 100(%rdi), %ymm4 {%k4} {z}
+; AVX10-256-NEXT: vmovups 68(%rdi), %ymm5 {%k3} {z}
+; AVX10-256-NEXT: vmovups 36(%rdi), %ymm6 {%k2} {z}
+; AVX10-256-NEXT: vmovups 4(%rdi), %ymm7 {%k1} {z}
+; AVX10-256-NEXT: vcmpltps %ymm7, %ymm0, %k1
+; AVX10-256-NEXT: vcmpltps %ymm6, %ymm1, %k2
+; AVX10-256-NEXT: kunpckbw %k1, %k2, %k1
+; AVX10-256-NEXT: vcmpltps %ymm5, %ymm2, %k2
+; AVX10-256-NEXT: vcmpltps %ymm4, %ymm3, %k3
+; AVX10-256-NEXT: kunpckbw %k2, %k3, %k2
+; AVX10-256-NEXT: kunpckwd %k1, %k2, %k1
+; AVX10-256-NEXT: kortestd %k1, %k0
+; AVX10-256-NEXT: je LBB45_2
+; AVX10-256-NEXT: ## %bb.1: ## %L1
+; AVX10-256-NEXT: vmovaps %ymm0, (%rdi)
+; AVX10-256-NEXT: vmovaps %ymm1, 32(%rdi)
+; AVX10-256-NEXT: vmovaps %ymm2, 64(%rdi)
+; AVX10-256-NEXT: vmovaps %ymm3, 96(%rdi)
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+; AVX10-256-NEXT: LBB45_2: ## %L2
+; AVX10-256-NEXT: vmovaps %ymm0, 4(%rdi)
+; AVX10-256-NEXT: vmovaps %ymm1, 36(%rdi)
+; AVX10-256-NEXT: vmovaps %ymm2, 68(%rdi)
+; AVX10-256-NEXT: vmovaps %ymm3, 100(%rdi)
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: ktest_2:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2390,6 +2744,14 @@ define <8 x i64> @load_8i1(ptr %a) {
; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: load_8i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovb (%rdi), %k0
+; AVX10-256-NEXT: vpmovm2q %k0, %ymm0
+; AVX10-256-NEXT: kshiftrb $4, %k0, %k0
+; AVX10-256-NEXT: vpmovm2q %k0, %ymm1
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: load_8i1:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2426,6 +2788,14 @@ define <16 x i32> @load_16i1(ptr %a) {
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: load_16i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovb (%rdi), %k0
+; AVX10-256-NEXT: kmovb 1(%rdi), %k1
+; AVX10-256-NEXT: vpmovm2d %k0, %ymm0
+; AVX10-256-NEXT: vpmovm2d %k1, %ymm1
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: load_16i1:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2472,6 +2842,12 @@ define <2 x i16> @load_2i1(ptr %a) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: load_2i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovb (%rdi), %k0
+; AVX10-256-NEXT: vpmovm2w %k0, %xmm0
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: load_2i1:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2518,6 +2894,12 @@ define <4 x i16> @load_4i1(ptr %a) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: load_4i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovb (%rdi), %k0
+; AVX10-256-NEXT: vpmovm2w %k0, %xmm0
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: load_4i1:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2564,6 +2946,14 @@ define <32 x i16> @load_32i1(ptr %a) {
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: load_32i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovw (%rdi), %k0
+; AVX10-256-NEXT: kmovw 2(%rdi), %k1
+; AVX10-256-NEXT: vpmovm2w %k0, %ymm0
+; AVX10-256-NEXT: vpmovm2w %k1, %ymm1
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: load_32i1:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2626,6 +3016,14 @@ define <64 x i8> @load_64i1(ptr %a) {
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: load_64i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovd (%rdi), %k0
+; AVX10-256-NEXT: kmovd 4(%rdi), %k1
+; AVX10-256-NEXT: vpmovm2b %k0, %ymm0
+; AVX10-256-NEXT: vpmovm2b %k1, %ymm1
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: load_64i1:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2673,6 +3071,13 @@ define void @store_8i1(ptr %a, <8 x i1> %v) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: store_8i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllw $15, %xmm0, %xmm0
+; AVX10-256-NEXT: vpmovw2m %xmm0, %k0
+; AVX10-256-NEXT: kmovb %k0, (%rdi)
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: store_8i1:
; X86: ## %bb.0:
; X86-NEXT: vpsllw $15, %xmm0, %xmm0
@@ -2720,6 +3125,13 @@ define void @store_8i1_1(ptr %a, <8 x i16> %v) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: store_8i1_1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllw $15, %xmm0, %xmm0
+; AVX10-256-NEXT: vpmovw2m %xmm0, %k0
+; AVX10-256-NEXT: kmovb %k0, (%rdi)
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: store_8i1_1:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2766,6 +3178,13 @@ define void @store_16i1(ptr %a, <16 x i1> %v) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: store_16i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllw $7, %xmm0, %xmm0
+; AVX10-256-NEXT: vpmovb2m %xmm0, %k0
+; AVX10-256-NEXT: kmovw %k0, (%rdi)
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: store_16i1:
; X86: ## %bb.0:
; X86-NEXT: vpsllw $7, %xmm0, %xmm0
@@ -2822,6 +3241,14 @@ define void @store_32i1(ptr %a, <32 x i1> %v) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: store_32i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllw $7, %ymm0, %ymm0
+; AVX10-256-NEXT: vpmovb2m %ymm0, %k0
+; AVX10-256-NEXT: kmovd %k0, (%rdi)
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: store_32i1:
; X86: ## %bb.0:
; X86-NEXT: vpsllw $7, %ymm0, %ymm0
@@ -2879,6 +3306,17 @@ define void @store_32i1_1(ptr %a, <32 x i16> %v) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: store_32i1_1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllw $15, %ymm0, %ymm0
+; AVX10-256-NEXT: vpmovw2m %ymm0, %k0
+; AVX10-256-NEXT: vpsllw $15, %ymm1, %ymm0
+; AVX10-256-NEXT: vpmovw2m %ymm0, %k1
+; AVX10-256-NEXT: kunpckwd %k0, %k1, %k0
+; AVX10-256-NEXT: kmovd %k0, (%rdi)
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: store_32i1_1:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -3787,6 +4225,17 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
; AVX512DQ-NEXT: kmovw %k0, (%rdi)
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: store_64i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllw $7, %ymm0, %ymm0
+; AVX10-256-NEXT: vpmovb2m %ymm0, %k0
+; AVX10-256-NEXT: vpsllw $7, %ymm1, %ymm0
+; AVX10-256-NEXT: vpmovb2m %ymm0, %k1
+; AVX10-256-NEXT: kmovd %k1, 4(%rdi)
+; AVX10-256-NEXT: kmovd %k0, (%rdi)
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: store_64i1:
; X86: ## %bb.0:
; X86-NEXT: vpsllw $7, %zmm0, %zmm0
@@ -3834,6 +4283,14 @@ define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: test_bitcast_v8i1_zext:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: kmovb %k0, %eax
+; AVX10-256-NEXT: addl %eax, %eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: test_bitcast_v8i1_zext:
; X86: ## %bb.0:
; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
@@ -3850,13 +4307,47 @@ define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
}
define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) {
-; CHECK-LABEL: test_bitcast_v16i1_zext:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: addl %eax, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; KNL-LABEL: test_bitcast_v16i1_zext:
+; KNL: ## %bb.0:
+; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: addl %eax, %eax
+; KNL-NEXT: vzeroupper
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test_bitcast_v16i1_zext:
+; SKX: ## %bb.0:
+; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
+; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: addl %eax, %eax
+; SKX-NEXT: vzeroupper
+; SKX-NEXT: retq
+;
+; AVX512BW-LABEL: test_bitcast_v16i1_zext:
+; AVX512BW: ## %bb.0:
+; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
+; AVX512BW-NEXT: kmovw %k0, %eax
+; AVX512BW-NEXT: addl %eax, %eax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQ-LABEL: test_bitcast_v16i1_zext:
+; AVX512DQ: ## %bb.0:
+; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
+; AVX512DQ-NEXT: kmovw %k0, %eax
+; AVX512DQ-NEXT: addl %eax, %eax
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
+;
+; AVX10-256-LABEL: test_bitcast_v16i1_zext:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT: kmovw %k0, %eax
+; AVX10-256-NEXT: addl %eax, %eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
;
; X86-LABEL: test_bitcast_v16i1_zext:
; X86: ## %bb.0:
@@ -4066,6 +4557,27 @@ define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
; AVX512DQ-NEXT: addq $8, %rsp
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: ktest_signed:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpor %ymm3, %ymm1, %ymm1
+; AVX10-256-NEXT: vpor %ymm2, %ymm0, %ymm0
+; AVX10-256-NEXT: vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: testw %ax, %ax
+; AVX10-256-NEXT: jle LBB66_1
+; AVX10-256-NEXT: ## %bb.2: ## %bb.2
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+; AVX10-256-NEXT: LBB66_1: ## %bb.1
+; AVX10-256-NEXT: pushq %rax
+; AVX10-256-NEXT: .cfi_def_cfa_offset 16
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: callq _foo
+; AVX10-256-NEXT: addq $8, %rsp
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: ktest_signed:
; X86: ## %bb.0:
; X86-NEXT: vpord %zmm1, %zmm0, %zmm0
@@ -4099,21 +4611,88 @@ declare void @foo()
; Make sure we can use the ZF/CF flags from kortest to check for all ones.
define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) {
-; CHECK-LABEL: ktest_allones:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0
-; CHECK-NEXT: vptestmd %zmm0, %zmm0, %k0
-; CHECK-NEXT: kortestw %k0, %k0
-; CHECK-NEXT: je LBB67_2
-; CHECK-NEXT: ## %bb.1: ## %bb.1
-; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: callq _foo
-; CHECK-NEXT: addq $8, %rsp
-; CHECK-NEXT: LBB67_2: ## %bb.2
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; KNL-LABEL: ktest_allones:
+; KNL: ## %bb.0:
+; KNL-NEXT: vpord %zmm1, %zmm0, %zmm0
+; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
+; KNL-NEXT: kortestw %k0, %k0
+; KNL-NEXT: je LBB67_2
+; KNL-NEXT: ## %bb.1: ## %bb.1
+; KNL-NEXT: pushq %rax
+; KNL-NEXT: .cfi_def_cfa_offset 16
+; KNL-NEXT: vzeroupper
+; KNL-NEXT: callq _foo
+; KNL-NEXT: addq $8, %rsp
+; KNL-NEXT: LBB67_2: ## %bb.2
+; KNL-NEXT: vzeroupper
+; KNL-NEXT: retq
+;
+; SKX-LABEL: ktest_allones:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpord %zmm1, %zmm0, %zmm0
+; SKX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; SKX-NEXT: kortestw %k0, %k0
+; SKX-NEXT: je LBB67_2
+; SKX-NEXT: ## %bb.1: ## %bb.1
+; SKX-NEXT: pushq %rax
+; SKX-NEXT: .cfi_def_cfa_offset 16
+; SKX-NEXT: vzeroupper
+; SKX-NEXT: callq _foo
+; SKX-NEXT: addq $8, %rsp
+; SKX-NEXT: LBB67_2: ## %bb.2
+; SKX-NEXT: vzeroupper
+; SKX-NEXT: retq
+;
+; AVX512BW-LABEL: ktest_allones:
+; AVX512BW: ## %bb.0:
+; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
+; AVX512BW-NEXT: kortestw %k0, %k0
+; AVX512BW-NEXT: je LBB67_2
+; AVX512BW-NEXT: ## %bb.1: ## %bb.1
+; AVX512BW-NEXT: pushq %rax
+; AVX512BW-NEXT: .cfi_def_cfa_offset 16
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: callq _foo
+; AVX512BW-NEXT: addq $8, %rsp
+; AVX512BW-NEXT: LBB67_2: ## %bb.2
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQ-LABEL: ktest_allones:
+; AVX512DQ: ## %bb.0:
+; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
+; AVX512DQ-NEXT: kortestw %k0, %k0
+; AVX512DQ-NEXT: je LBB67_2
+; AVX512DQ-NEXT: ## %bb.1: ## %bb.1
+; AVX512DQ-NEXT: pushq %rax
+; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: callq _foo
+; AVX512DQ-NEXT: addq $8, %rsp
+; AVX512DQ-NEXT: LBB67_2: ## %bb.2
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
+;
+; AVX10-256-LABEL: ktest_allones:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpor %ymm3, %ymm1, %ymm1
+; AVX10-256-NEXT: vpor %ymm2, %ymm0, %ymm0
+; AVX10-256-NEXT: vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT: kortestw %k0, %k0
+; AVX10-256-NEXT: jb LBB67_2
+; AVX10-256-NEXT: ## %bb.1: ## %bb.1
+; AVX10-256-NEXT: pushq %rax
+; AVX10-256-NEXT: .cfi_def_cfa_offset 16
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: callq _foo
+; AVX10-256-NEXT: addq $8, %rsp
+; AVX10-256-NEXT: LBB67_2: ## %bb.2
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
;
; X86-LABEL: ktest_allones:
; X86: ## %bb.0:
@@ -4182,6 +4761,14 @@ define <8 x i64> @mask_widening(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i
; AVX512DQ-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: mask_widening:
+; AVX10-256: ## %bb.0: ## %entry
+; AVX10-256-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
+; AVX10-256-NEXT: vpblendmd %ymm6, %ymm4, %ymm0 {%k1}
+; AVX10-256-NEXT: kshiftrw $8, %k1, %k1
+; AVX10-256-NEXT: vpblendmd %ymm7, %ymm5, %ymm1 {%k1}
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: mask_widening:
; X86: ## %bb.0: ## %entry
; X86-NEXT: pushl %ebp
@@ -4239,6 +4826,12 @@ define void @store_v128i1_constant(ptr %R) {
; AVX512DQ-NEXT: vmovaps %xmm0, (%rdi)
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: store_v128i1_constant:
+; AVX10-256: ## %bb.0: ## %entry
+; AVX10-256-NEXT: vmovaps {{.*#+}} xmm0 = [4294963197,3758096251,4294959101,3221225403]
+; AVX10-256-NEXT: vmovaps %xmm0, (%rdi)
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: store_v128i1_constant:
; X86: ## %bb.0: ## %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -4302,13 +4895,48 @@ entry:
; Make sure we bring the -1 constant into the mask domain.
define void @mask_not_cast(ptr, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>) {
-; CHECK-LABEL: mask_not_cast:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpnleud %zmm3, %zmm2, %k1
-; CHECK-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1}
-; CHECK-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; KNL-LABEL: mask_not_cast:
+; KNL: ## %bb.0:
+; KNL-NEXT: vpcmpnleud %zmm3, %zmm2, %k1
+; KNL-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1}
+; KNL-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
+; KNL-NEXT: vzeroupper
+; KNL-NEXT: retq
+;
+; SKX-LABEL: mask_not_cast:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpcmpnleud %zmm3, %zmm2, %k1
+; SKX-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1}
+; SKX-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
+; SKX-NEXT: vzeroupper
+; SKX-NEXT: retq
+;
+; AVX512BW-LABEL: mask_not_cast:
+; AVX512BW: ## %bb.0:
+; AVX512BW-NEXT: vpcmpnleud %zmm3, %zmm2, %k1
+; AVX512BW-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1}
+; AVX512BW-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQ-LABEL: mask_not_cast:
+; AVX512DQ: ## %bb.0:
+; AVX512DQ-NEXT: vpcmpnleud %zmm3, %zmm2, %k1
+; AVX512DQ-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1}
+; AVX512DQ-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
+;
+; AVX10-256-LABEL: mask_not_cast:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpnleud %ymm6, %ymm4, %k1
+; AVX10-256-NEXT: vpcmpnleud %ymm7, %ymm5, %k2
+; AVX10-256-NEXT: vptestmd %ymm1, %ymm3, %k2 {%k2}
+; AVX10-256-NEXT: vmovdqu32 %ymm1, 32(%rdi) {%k2}
+; AVX10-256-NEXT: vptestmd %ymm0, %ymm2, %k1 {%k1}
+; AVX10-256-NEXT: vmovdqu32 %ymm0, (%rdi) {%k1}
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
;
; X86-LABEL: mask_not_cast:
; X86: ## %bb.0:
@@ -4436,6 +5064,27 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
; AVX512DQ-NEXT: addq $8, %rsp
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: ktest_3:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmd %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: ktestb %k1, %k0
+; AVX10-256-NEXT: je LBB74_1
+; AVX10-256-NEXT: ## %bb.2: ## %exit
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+; AVX10-256-NEXT: LBB74_1: ## %bar
+; AVX10-256-NEXT: pushq %rax
+; AVX10-256-NEXT: .cfi_def_cfa_offset 16
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: callq _foo
+; AVX10-256-NEXT: addq $8, %rsp
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: ktest_3:
; X86: ## %bb.0:
; X86-NEXT: vptestnmd %ymm0, %ymm0, %k0
@@ -4564,6 +5213,39 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
; AVX512DQ-NEXT: addq $8, %rsp
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: ktest_4:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm4, %ymm4, %k1
+; AVX10-256-NEXT: vptestnmq %ymm5, %ymm5, %k2
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: vptestnmq %ymm6, %ymm6, %k2
+; AVX10-256-NEXT: vptestnmq %ymm7, %ymm7, %k3
+; AVX10-256-NEXT: kshiftlb $4, %k3, %k3
+; AVX10-256-NEXT: korb %k3, %k2, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: ktestb %k1, %k0
+; AVX10-256-NEXT: je LBB75_1
+; AVX10-256-NEXT: ## %bb.2: ## %exit
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+; AVX10-256-NEXT: LBB75_1: ## %bar
+; AVX10-256-NEXT: pushq %rax
+; AVX10-256-NEXT: .cfi_def_cfa_offset 16
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: callq _foo
+; AVX10-256-NEXT: addq $8, %rsp
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: ktest_4:
; X86: ## %bb.0:
; X86-NEXT: vptestnmq %zmm0, %zmm0, %k0
@@ -4690,6 +5372,35 @@ define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
; AVX512DQ-NEXT: addq $8, %rsp
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: ktest_5:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT: vptestnmd %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmd %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kunpckbw %k1, %k2, %k1
+; AVX10-256-NEXT: korw %k1, %k0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm4, %ymm4, %k1
+; AVX10-256-NEXT: vptestnmd %ymm5, %ymm5, %k2
+; AVX10-256-NEXT: kunpckbw %k1, %k2, %k1
+; AVX10-256-NEXT: vptestnmd %ymm6, %ymm6, %k2
+; AVX10-256-NEXT: vptestnmd %ymm7, %ymm7, %k3
+; AVX10-256-NEXT: kunpckbw %k2, %k3, %k2
+; AVX10-256-NEXT: korw %k2, %k1, %k1
+; AVX10-256-NEXT: ktestw %k1, %k0
+; AVX10-256-NEXT: je LBB76_1
+; AVX10-256-NEXT: ## %bb.2: ## %exit
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+; AVX10-256-NEXT: LBB76_1: ## %bar
+; AVX10-256-NEXT: pushq %rax
+; AVX10-256-NEXT: .cfi_def_cfa_offset 16
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: callq _foo
+; AVX10-256-NEXT: addq $8, %rsp
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: ktest_5:
; X86: ## %bb.0:
; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
@@ -4850,6 +5561,35 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
; AVX512DQ-NEXT: addq $8, %rsp
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: ktest_6:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vptestnmw %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmw %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kunpckwd %k0, %k1, %k0
+; AVX10-256-NEXT: vptestnmw %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmw %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kunpckwd %k1, %k2, %k1
+; AVX10-256-NEXT: kord %k1, %k0, %k0
+; AVX10-256-NEXT: vptestnmw %ymm4, %ymm4, %k1
+; AVX10-256-NEXT: vptestnmw %ymm5, %ymm5, %k2
+; AVX10-256-NEXT: kunpckwd %k1, %k2, %k1
+; AVX10-256-NEXT: vptestnmw %ymm6, %ymm6, %k2
+; AVX10-256-NEXT: vptestnmw %ymm7, %ymm7, %k3
+; AVX10-256-NEXT: kunpckwd %k2, %k3, %k2
+; AVX10-256-NEXT: kord %k2, %k1, %k1
+; AVX10-256-NEXT: ktestd %k1, %k0
+; AVX10-256-NEXT: je LBB77_1
+; AVX10-256-NEXT: ## %bb.2: ## %exit
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+; AVX10-256-NEXT: LBB77_1: ## %bar
+; AVX10-256-NEXT: pushq %rax
+; AVX10-256-NEXT: .cfi_def_cfa_offset 16
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: callq _foo
+; AVX10-256-NEXT: addq $8, %rsp
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: ktest_6:
; X86: ## %bb.0:
; X86-NEXT: vptestnmw %zmm0, %zmm0, %k0
@@ -5006,6 +5746,35 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
; AVX512DQ-NEXT: addq $8, %rsp
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: ktest_7:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vptestnmb %ymm1, %ymm1, %k0
+; AVX10-256-NEXT: vptestnmb %ymm0, %ymm0, %k1
+; AVX10-256-NEXT: vptestnmb %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kord %k2, %k0, %k0
+; AVX10-256-NEXT: vptestnmb %ymm2, %ymm2, %k2
+; AVX10-256-NEXT: kord %k2, %k1, %k1
+; AVX10-256-NEXT: vptestnmb %ymm5, %ymm5, %k2
+; AVX10-256-NEXT: vptestnmb %ymm4, %ymm4, %k3
+; AVX10-256-NEXT: vptestnmb %ymm7, %ymm7, %k4
+; AVX10-256-NEXT: kord %k4, %k2, %k2
+; AVX10-256-NEXT: kandd %k2, %k0, %k0
+; AVX10-256-NEXT: vptestnmb %ymm6, %ymm6, %k2
+; AVX10-256-NEXT: kord %k2, %k3, %k2
+; AVX10-256-NEXT: kandd %k2, %k1, %k1
+; AVX10-256-NEXT: kortestd %k0, %k1
+; AVX10-256-NEXT: je LBB78_1
+; AVX10-256-NEXT: ## %bb.2: ## %exit
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
+; AVX10-256-NEXT: LBB78_1: ## %bar
+; AVX10-256-NEXT: pushq %rax
+; AVX10-256-NEXT: .cfi_def_cfa_offset 16
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: callq _foo
+; AVX10-256-NEXT: addq $8, %rsp
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: ktest_7:
; X86: ## %bb.0:
; X86-NEXT: vptestnmb %zmm0, %zmm0, %k0
@@ -5104,6 +5873,21 @@ define <64 x i1> @mask64_insert(i32 %a) {
; AVX512DQ-NEXT: movl $-131075, 2(%rdi) ## imm = 0xFFFDFFFD
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: mask64_insert:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovd %edi, %k0
+; AVX10-256-NEXT: kshiftld $31, %k0, %k0
+; AVX10-256-NEXT: kshiftrd $31, %k0, %k0
+; AVX10-256-NEXT: movl $-131076, %eax ## imm = 0xFFFDFFFC
+; AVX10-256-NEXT: kmovd %eax, %k1
+; AVX10-256-NEXT: kshiftrd $1, %k1, %k1
+; AVX10-256-NEXT: kshiftld $1, %k1, %k1
+; AVX10-256-NEXT: kord %k0, %k1, %k0
+; AVX10-256-NEXT: vpmovm2b %k0, %ymm0
+; AVX10-256-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX10-256-NEXT: ## ymm1 = mem[0,1,0,1]
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: mask64_insert:
; X86: ## %bb.0:
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
@@ -5240,6 +6024,15 @@ define <1 x i1> @usub_sat_v1i1(<1 x i1> %x, <1 x i1> %y) nounwind {
; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: usub_sat_v1i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovd %esi, %k0
+; AVX10-256-NEXT: kmovd %edi, %k1
+; AVX10-256-NEXT: kandnw %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: ## kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: usub_sat_v1i1:
; X86: ## %bb.0:
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
@@ -5309,6 +6102,15 @@ define <1 x i1> @ssub_sat_v1i1(<1 x i1> %x, <1 x i1> %y) nounwind {
; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
; AVX512DQ-NEXT: retq
;
+; AVX10-256-LABEL: ssub_sat_v1i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovd %esi, %k0
+; AVX10-256-NEXT: kmovd %edi, %k1
+; AVX10-256-NEXT: kandnw %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: ## kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: retq
+;
; X86-LABEL: ssub_sat_v1i1:
; X86: ## %bb.0:
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
diff --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
index e4c62fca5bd57aa..0736a559987e673 100644
--- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
@@ -1,36 +1,79 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=KNL
-; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
-; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,KNL
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefixes=SKX
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=AVX10-256
define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
-; CHECK-LABEL: test1:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vcmpleps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x02]
-; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test1:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vcmpleps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x02]
+; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test1:
+; SKX: ## %bb.0:
+; SKX-NEXT: vcmpleps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x02]
+; SKX-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpleps %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xcb,0x02]
+; AVX10-256-NEXT: vcmpleps %ymm2, %ymm0, %k2 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xd2,0x02]
+; AVX10-256-NEXT: vblendmps %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x65,0xc0]
+; AVX10-256-NEXT: vblendmps %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x65,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = fcmp ole <16 x float> %x, %y
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
ret <16 x float> %max
}
define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
-; CHECK-LABEL: test2:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vcmplepd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x02]
-; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test2:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vcmplepd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x02]
+; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test2:
+; SKX: ## %bb.0:
+; SKX-NEXT: vcmplepd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x02]
+; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test2:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmplepd %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x28,0xc2,0xcb,0x02]
+; AVX10-256-NEXT: vcmplepd %ymm2, %ymm0, %k2 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xd2,0x02]
+; AVX10-256-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x65,0xc0]
+; AVX10-256-NEXT: vblendmpd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x65,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = fcmp ole <8 x double> %x, %y
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
ret <8 x double> %max
}
define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, ptr %yp) nounwind {
-; CHECK-LABEL: test3:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x0f]
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test3:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x0f]
+; AVX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test3:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x0f]
+; SKX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test3:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpeqd 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x76,0x4f,0x01]
+; AVX10-256-NEXT: vpcmpeqd (%rdi), %ymm0, %k2 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0x17]
+; AVX10-256-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpblendmd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <16 x i32>, ptr %yp, align 4
%mask = icmp eq <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@@ -38,33 +81,75 @@ define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, ptr %yp) nounwind {
}
define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
-; CHECK-LABEL: test4_unsigned:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xc9,0x05]
-; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test4_unsigned:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vpcmpnltud %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xc9,0x05]
+; AVX512-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test4_unsigned:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpcmpnltud %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xc9,0x05]
+; SKX-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test4_unsigned:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpnltud %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1e,0xcb,0x05]
+; AVX10-256-NEXT: vpcmpnltud %ymm2, %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xd2,0x05]
+; AVX10-256-NEXT: vpblendmd %ymm4, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc4]
+; AVX10-256-NEXT: vpblendmd %ymm5, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xcd]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = icmp uge <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
ret <16 x i32> %max
}
define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
-; CHECK-LABEL: test5:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc9]
-; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test5:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc9]
+; AVX512-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test5:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc9]
+; SKX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test5:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpeqq %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x29,0xcb]
+; AVX10-256-NEXT: vpcmpeqq %ymm2, %ymm0, %k2 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xd2]
+; AVX10-256-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpblendmq %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = icmp eq <8 x i64> %x, %y
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
ret <8 x i64> %max
}
define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind {
-; CHECK-LABEL: test6_unsigned:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc9,0x06]
-; CHECK-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc2]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test6_unsigned:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc9,0x06]
+; AVX512-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc2]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test6_unsigned:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc9,0x06]
+; SKX-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc2]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test6_unsigned:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpnleuq %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1e,0xcb,0x06]
+; AVX10-256-NEXT: vpcmpnleuq %ymm2, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xd2,0x06]
+; AVX10-256-NEXT: vpblendmq %ymm4, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x64,0xc4]
+; AVX10-256-NEXT: vpblendmq %ymm5, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x64,0xcd]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = icmp ugt <8 x i64> %x, %y
%max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
ret <8 x i64> %max
@@ -88,6 +173,13 @@ define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
; SKX-NEXT: vcmpltps %xmm2, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xca,0x01]
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test7:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x57,0xd2]
+; AVX10-256-NEXT: vcmpltps %xmm2, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xca,0x01]
+; AVX10-256-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = fcmp olt <4 x float> %a, zeroinitializer
%c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b
@@ -112,6 +204,13 @@ define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
; SKX-NEXT: vcmpltpd %xmm2, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xca,0x01]
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test8:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x57,0xd2]
+; AVX10-256-NEXT: vcmpltpd %xmm2, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xca,0x01]
+; AVX10-256-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = fcmp olt <2 x double> %a, zeroinitializer
%c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
ret <2 x double>%c
@@ -132,6 +231,12 @@ define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc9]
; SKX-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x64,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test9:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpeqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc9]
+; AVX10-256-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x64,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = icmp eq <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
ret <8 x i32> %max
@@ -152,6 +257,12 @@ define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x00]
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test10:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x00]
+; AVX10-256-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = fcmp oeq <8 x float> %x, %y
%max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
@@ -168,6 +279,11 @@ define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
; SKX: ## %bb.0:
; SKX-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xc1]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test11_unsigned:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xc1]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = icmp ugt <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
ret <8 x i32> %max
@@ -203,6 +319,22 @@ define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test12:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpeqq %ymm4, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc4]
+; AVX10-256-NEXT: vpcmpeqq %ymm5, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x29,0xcd]
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1 ## encoding: [0xc4,0xe3,0x79,0x32,0xc9,0x04]
+; AVX10-256-NEXT: korb %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x45,0xc1]
+; AVX10-256-NEXT: vpcmpeqq %ymm6, %ymm2, %k1 ## encoding: [0x62,0xf2,0xed,0x28,0x29,0xce]
+; AVX10-256-NEXT: vpcmpeqq %ymm7, %ymm3, %k2 ## encoding: [0x62,0xf2,0xe5,0x28,0x29,0xd7]
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2 ## encoding: [0xc4,0xe3,0x79,0x32,0xd2,0x04]
+; AVX10-256-NEXT: korb %k2, %k1, %k1 ## encoding: [0xc5,0xf5,0x45,0xca]
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
+; AVX10-256-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; AVX10-256-NEXT: ## kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%res = icmp eq <16 x i64> %a, %b
%res1 = bitcast <16 x i1> %res to i16
ret i16 %res1
@@ -237,6 +369,19 @@ define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
; SKX-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test12_v32i32:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpeqd %ymm4, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc4]
+; AVX10-256-NEXT: vpcmpeqd %ymm5, %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x76,0xcd]
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
+; AVX10-256-NEXT: vpcmpeqd %ymm6, %ymm2, %k1 ## encoding: [0x62,0xf1,0x6d,0x28,0x76,0xce]
+; AVX10-256-NEXT: vpcmpeqd %ymm7, %ymm3, %k2 ## encoding: [0x62,0xf1,0x65,0x28,0x76,0xd7]
+; AVX10-256-NEXT: kunpckbw %k1, %k2, %k1 ## encoding: [0xc5,0xed,0x4b,0xc9]
+; AVX10-256-NEXT: kunpckwd %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x4b,0xc0]
+; AVX10-256-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; AVX10-256-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%res = icmp eq <32 x i32> %a, %b
%res1 = bitcast <32 x i1> %res to i32
ret i32 %res1
@@ -291,6 +436,21 @@ define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
; SKX-NEXT: kmovq %k0, %rax ## encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test12_v64i16:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpeqw %ymm4, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc4]
+; AVX10-256-NEXT: vpcmpeqw %ymm5, %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x75,0xcd]
+; AVX10-256-NEXT: kunpckwd %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x4b,0xc0]
+; AVX10-256-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
+; AVX10-256-NEXT: vpcmpeqw %ymm6, %ymm2, %k0 ## encoding: [0x62,0xf1,0x6d,0x28,0x75,0xc6]
+; AVX10-256-NEXT: vpcmpeqw %ymm7, %ymm3, %k1 ## encoding: [0x62,0xf1,0x65,0x28,0x75,0xcf]
+; AVX10-256-NEXT: kunpckwd %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x4b,0xc0]
+; AVX10-256-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; AVX10-256-NEXT: shlq $32, %rax ## encoding: [0x48,0xc1,0xe0,0x20]
+; AVX10-256-NEXT: orq %rcx, %rax ## encoding: [0x48,0x09,0xc8]
+; AVX10-256-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%res = icmp eq <64 x i16> %a, %b
%res1 = bitcast <64 x i1> %res to i64
ret i64 %res1
@@ -310,6 +470,17 @@ define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
; SKX-NEXT: vpmovm2d %k0, %zmm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x38,0xc0]
; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xd0,0x1f]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test13:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpeqps %ymm2, %ymm0, %ymm0 ## encoding: [0xc5,0xfc,0xc2,0xc2,0x00]
+; AVX10-256-NEXT: vbroadcastss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2 ## EVEX TO VEX Compression ymm2 = [1,1,1,1,1,1,1,1]
+; AVX10-256-NEXT: ## encoding: [0xc4,0xe2,0x7d,0x18,0x15,A,A,A,A]
+; AVX10-256-NEXT: ## fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
+; AVX10-256-NEXT: vandps %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x54,0xc2]
+; AVX10-256-NEXT: vcmpeqps %ymm3, %ymm1, %ymm1 ## encoding: [0xc5,0xf4,0xc2,0xcb,0x00]
+; AVX10-256-NEXT: vandps %ymm2, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x54,0xca]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
{
%cmpvector_i = fcmp oeq <16 x float> %a, %b
%conv = zext <16 x i1> %cmpvector_i to <16 x i32>
@@ -317,12 +488,29 @@ define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
}
define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
-; CHECK-LABEL: test14:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc9]
-; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x66,0xc8]
-; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xc1]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test14:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vpsubd %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc9]
+; AVX512-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x66,0xc8]
+; AVX512-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xc1]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test14:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpsubd %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc9]
+; SKX-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x66,0xc8]
+; SKX-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xc1]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test14:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsubd %ymm2, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0xd2]
+; AVX10-256-NEXT: vpsubd %ymm3, %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfa,0xdb]
+; AVX10-256-NEXT: vpcmpgtd %ymm1, %ymm3, %k1 ## encoding: [0x62,0xf1,0x65,0x28,0x66,0xc9]
+; AVX10-256-NEXT: vpcmpgtd %ymm0, %ymm2, %k2 ## encoding: [0x62,0xf1,0x6d,0x28,0x66,0xd0]
+; AVX10-256-NEXT: vmovdqa32 %ymm2, %ymm0 {%k2} {z} ## encoding: [0x62,0xf1,0x7d,0xaa,0x6f,0xc2]
+; AVX10-256-NEXT: vmovdqa32 %ymm3, %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0xcb]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%sub_r = sub <16 x i32> %a, %b
%cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
%sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>
@@ -332,12 +520,29 @@ define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
}
define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
-; CHECK-LABEL: test15:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc9]
-; CHECK-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xc8]
-; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0xc1]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test15:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vpsubq %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc9]
+; AVX512-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xc8]
+; AVX512-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0xc1]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test15:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpsubq %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc9]
+; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xc8]
+; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0xc1]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test15:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsubq %ymm2, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfb,0xd2]
+; AVX10-256-NEXT: vpsubq %ymm3, %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfb,0xdb]
+; AVX10-256-NEXT: vpcmpgtq %ymm1, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x37,0xc9]
+; AVX10-256-NEXT: vpcmpgtq %ymm0, %ymm2, %k2 ## encoding: [0x62,0xf2,0xed,0x28,0x37,0xd0]
+; AVX10-256-NEXT: vmovdqa64 %ymm2, %ymm0 {%k2} {z} ## encoding: [0x62,0xf1,0xfd,0xaa,0x6f,0xc2]
+; AVX10-256-NEXT: vmovdqa64 %ymm3, %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0xcb]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%sub_r = sub <8 x i64> %a, %b
%cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
%sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>
@@ -347,22 +552,50 @@ define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
}
define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
-; CHECK-LABEL: test16:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x05]
-; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test16:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vpcmpnltd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x05]
+; AVX512-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test16:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpcmpnltd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x05]
+; SKX-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test16:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpnltd %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xcb,0x05]
+; AVX10-256-NEXT: vpcmpnltd %ymm2, %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd2,0x05]
+; AVX10-256-NEXT: vpblendmd %ymm4, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc4]
+; AVX10-256-NEXT: vpblendmd %ymm5, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xcd]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = icmp sge <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
ret <16 x i32> %max
}
define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
-; CHECK-LABEL: test17:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0x0f]
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test17:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0x0f]
+; AVX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test17:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0x0f]
+; SKX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test17:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpgtd 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x66,0x4f,0x01]
+; AVX10-256-NEXT: vpcmpgtd (%rdi), %ymm0, %k2 ## encoding: [0x62,0xf1,0x7d,0x28,0x66,0x17]
+; AVX10-256-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpblendmd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <16 x i32>, ptr %y.ptr, align 4
%mask = icmp sgt <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@@ -370,11 +603,25 @@ define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
}
define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
-; CHECK-LABEL: test18:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpled (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0x0f,0x02]
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test18:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vpcmpled (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0x0f,0x02]
+; AVX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test18:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpcmpled (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0x0f,0x02]
+; SKX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test18:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpled 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0x4f,0x01,0x02]
+; AVX10-256-NEXT: vpcmpled (%rdi), %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0x17,0x02]
+; AVX10-256-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpblendmd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <16 x i32>, ptr %y.ptr, align 4
%mask = icmp sle <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@@ -382,11 +629,25 @@ define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
}
define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
-; CHECK-LABEL: test19:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0x0f,0x02]
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test19:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vpcmpleud (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0x0f,0x02]
+; AVX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test19:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpcmpleud (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0x0f,0x02]
+; SKX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test19:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpleud 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1e,0x4f,0x01,0x02]
+; AVX10-256-NEXT: vpcmpleud (%rdi), %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0x17,0x02]
+; AVX10-256-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpblendmd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <16 x i32>, ptr %y.ptr, align 4
%mask = icmp ule <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@@ -394,12 +655,29 @@ define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
}
define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
-; CHECK-LABEL: test20:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc9]
-; CHECK-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf1,0x6d,0x49,0x76,0xcb]
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test20:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc9]
+; AVX512-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf1,0x6d,0x49,0x76,0xcb]
+; AVX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test20:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc9]
+; SKX-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf1,0x6d,0x49,0x76,0xcb]
+; SKX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test20:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpeqd %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x76,0xcb]
+; AVX10-256-NEXT: vpcmpeqd %ymm2, %ymm0, %k2 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xd2]
+; AVX10-256-NEXT: vpcmpeqd %ymm6, %ymm4, %k2 {%k2} ## encoding: [0x62,0xf1,0x5d,0x2a,0x76,0xd6]
+; AVX10-256-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpcmpeqd %ymm7, %ymm5, %k1 {%k1} ## encoding: [0x62,0xf1,0x55,0x29,0x76,0xcf]
+; AVX10-256-NEXT: vpblendmd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask1 = icmp eq <16 x i32> %x1, %y1
%mask0 = icmp eq <16 x i32> %x, %y
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
@@ -408,12 +686,29 @@ define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i3
}
define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
-; CHECK-LABEL: test21:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x02]
-; CHECK-NEXT: vpcmpnltq %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf3,0xed,0x49,0x1f,0xcb,0x05]
-; CHECK-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test21:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vpcmpleq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x02]
+; AVX512-NEXT: vpcmpnltq %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf3,0xed,0x49,0x1f,0xcb,0x05]
+; AVX512-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x64,0xc0]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test21:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpcmpleq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x02]
+; SKX-NEXT: vpcmpnltq %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf3,0xed,0x49,0x1f,0xcb,0x05]
+; SKX-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x64,0xc0]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test21:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpleq %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xcb,0x02]
+; AVX10-256-NEXT: vpcmpleq %ymm2, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd2,0x02]
+; AVX10-256-NEXT: vpcmpnltq %ymm6, %ymm4, %k2 {%k2} ## encoding: [0x62,0xf3,0xdd,0x2a,0x1f,0xd6,0x05]
+; AVX10-256-NEXT: vpblendmq %ymm0, %ymm4, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xdd,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpcmpnltq %ymm7, %ymm5, %k1 {%k1} ## encoding: [0x62,0xf3,0xd5,0x29,0x1f,0xcf,0x05]
+; AVX10-256-NEXT: vpblendmq %ymm1, %ymm5, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xd5,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask1 = icmp sge <8 x i64> %x1, %y1
%mask0 = icmp sle <8 x i64> %x, %y
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
@@ -422,12 +717,29 @@ define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y
}
define <8 x i64> @test22(<8 x i64> %x, ptr %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
-; CHECK-LABEL: test22:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpgtq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xca]
-; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0x0f]
-; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test22:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vpcmpgtq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xca]
+; AVX512-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0x0f]
+; AVX512-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test22:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpcmpgtq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xca]
+; SKX-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0x0f]
+; SKX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test22:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpgtq %ymm5, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x37,0xcd]
+; AVX10-256-NEXT: vpcmpgtq %ymm4, %ymm2, %k2 ## encoding: [0x62,0xf2,0xed,0x28,0x37,0xd4]
+; AVX10-256-NEXT: vpcmpgtq (%rdi), %ymm0, %k2 {%k2} ## encoding: [0x62,0xf2,0xfd,0x2a,0x37,0x17]
+; AVX10-256-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpcmpgtq 32(%rdi), %ymm1, %k1 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x37,0x4f,0x01]
+; AVX10-256-NEXT: vpblendmq %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask1 = icmp sgt <8 x i64> %x1, %y1
%y = load <8 x i64>, ptr %y.ptr, align 4
%mask0 = icmp sgt <8 x i64> %x, %y
@@ -437,12 +749,29 @@ define <8 x i64> @test22(<8 x i64> %x, ptr %y.ptr, <8 x i64> %x1, <8 x i64> %y1)
}
define <16 x i32> @test23(<16 x i32> %x, ptr %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
-; CHECK-LABEL: test23:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
-; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0x0f,0x02]
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test23:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
+; AVX512-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0x0f,0x02]
+; AVX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test23:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
+; SKX-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0x0f,0x02]
+; SKX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test23:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpnltd %ymm5, %ymm3, %k1 ## encoding: [0x62,0xf3,0x65,0x28,0x1f,0xcd,0x05]
+; AVX10-256-NEXT: vpcmpnltd %ymm4, %ymm2, %k2 ## encoding: [0x62,0xf3,0x6d,0x28,0x1f,0xd4,0x05]
+; AVX10-256-NEXT: vpcmpleud (%rdi), %ymm0, %k2 {%k2} ## encoding: [0x62,0xf3,0x7d,0x2a,0x1e,0x17,0x02]
+; AVX10-256-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpcmpleud 32(%rdi), %ymm1, %k1 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x1e,0x4f,0x01,0x02]
+; AVX10-256-NEXT: vpblendmd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask1 = icmp sge <16 x i32> %x1, %y1
%y = load <16 x i32>, ptr %y.ptr, align 4
%mask0 = icmp ule <16 x i32> %x, %y
@@ -452,11 +781,26 @@ define <16 x i32> @test23(<16 x i32> %x, ptr %y.ptr, <16 x i32> %x1, <16 x i32>
}
define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, ptr %yb.ptr) nounwind {
-; CHECK-LABEL: test24:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x58,0x29,0x0f]
-; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test24:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x58,0x29,0x0f]
+; AVX512-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test24:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x58,0x29,0x0f]
+; SKX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test24:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpbroadcastq (%rdi), %ymm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0x27]
+; AVX10-256-NEXT: vpcmpeqq %ymm4, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x29,0xcc]
+; AVX10-256-NEXT: vpcmpeqq %ymm4, %ymm0, %k2 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xd4]
+; AVX10-256-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpblendmq %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%yb = load i64, ptr %yb.ptr, align 4
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
%y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -466,11 +810,26 @@ define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, ptr %yb.ptr) nounwind {
}
define <16 x i32> @test25(<16 x i32> %x, ptr %yb.ptr, <16 x i32> %x1) nounwind {
-; CHECK-LABEL: test25:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x58,0x1f,0x0f,0x02]
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test25:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x58,0x1f,0x0f,0x02]
+; AVX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test25:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x58,0x1f,0x0f,0x02]
+; SKX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test25:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpbroadcastd (%rdi), %ymm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x27]
+; AVX10-256-NEXT: vpcmpled %ymm4, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xcc,0x02]
+; AVX10-256-NEXT: vpcmpled %ymm4, %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd4,0x02]
+; AVX10-256-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpblendmd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%yb = load i32, ptr %yb.ptr, align 4
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
%y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -480,12 +839,30 @@ define <16 x i32> @test25(<16 x i32> %x, ptr %yb.ptr, <16 x i32> %x1) nounwind {
}
define <16 x i32> @test26(<16 x i32> %x, ptr %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
-; CHECK-LABEL: test26:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
-; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x66,0x0f]
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test26:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
+; AVX512-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x66,0x0f]
+; AVX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test26:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
+; SKX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x66,0x0f]
+; SKX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test26:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpbroadcastd (%rdi), %ymm6 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x37]
+; AVX10-256-NEXT: vpcmpgtd %ymm6, %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x66,0xce]
+; AVX10-256-NEXT: vpcmpgtd %ymm6, %ymm0, %k2 ## encoding: [0x62,0xf1,0x7d,0x28,0x66,0xd6]
+; AVX10-256-NEXT: vpcmpnltd %ymm4, %ymm2, %k2 {%k2} ## encoding: [0x62,0xf3,0x6d,0x2a,0x1f,0xd4,0x05]
+; AVX10-256-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpcmpnltd %ymm5, %ymm3, %k1 {%k1} ## encoding: [0x62,0xf3,0x65,0x29,0x1f,0xcd,0x05]
+; AVX10-256-NEXT: vpblendmd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask1 = icmp sge <16 x i32> %x1, %y1
%yb = load i32, ptr %yb.ptr, align 4
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
@@ -497,12 +874,30 @@ define <16 x i32> @test26(<16 x i32> %x, ptr %yb.ptr, <16 x i32> %x1, <16 x i32>
}
define <8 x i64> @test27(<8 x i64> %x, ptr %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
-; CHECK-LABEL: test27:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpcmpnltq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x05]
-; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x59,0x1f,0x0f,0x02]
-; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test27:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vpcmpnltq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x05]
+; AVX512-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x59,0x1f,0x0f,0x02]
+; AVX512-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test27:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpcmpnltq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x05]
+; SKX-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x59,0x1f,0x0f,0x02]
+; SKX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test27:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpbroadcastq (%rdi), %ymm6 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0x37]
+; AVX10-256-NEXT: vpcmpleq %ymm6, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xce,0x02]
+; AVX10-256-NEXT: vpcmpleq %ymm6, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd6,0x02]
+; AVX10-256-NEXT: vpcmpnltq %ymm4, %ymm2, %k2 {%k2} ## encoding: [0x62,0xf3,0xed,0x2a,0x1f,0xd4,0x05]
+; AVX10-256-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x64,0xc0]
+; AVX10-256-NEXT: vpcmpnltq %ymm5, %ymm3, %k1 {%k1} ## encoding: [0x62,0xf3,0xe5,0x29,0x1f,0xcd,0x05]
+; AVX10-256-NEXT: vpblendmq %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask1 = icmp sge <8 x i64> %x1, %y1
%yb = load i64, ptr %yb.ptr, align 4
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
@@ -530,6 +925,20 @@ define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1
; SKX-NEXT: kxnorb %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x46,0xc1]
; SKX-NEXT: vpmovm2d %k0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x38,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test28:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpgtq %ymm2, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x37,0xc2]
+; AVX10-256-NEXT: vpcmpgtq %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x37,0xcb]
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1 ## encoding: [0xc4,0xe3,0x79,0x32,0xc9,0x04]
+; AVX10-256-NEXT: korb %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x45,0xc1]
+; AVX10-256-NEXT: vpcmpgtq %ymm6, %ymm4, %k1 ## encoding: [0x62,0xf2,0xdd,0x28,0x37,0xce]
+; AVX10-256-NEXT: vpcmpgtq %ymm7, %ymm5, %k2 ## encoding: [0x62,0xf2,0xd5,0x28,0x37,0xd7]
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2 ## encoding: [0xc4,0xe3,0x79,0x32,0xd2,0x04]
+; AVX10-256-NEXT: korb %k2, %k1, %k1 ## encoding: [0xc5,0xf5,0x45,0xca]
+; AVX10-256-NEXT: kxnorb %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x46,0xc1]
+; AVX10-256-NEXT: vpmovm2d %k0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x38,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%x_gt_y = icmp sgt <8 x i64> %x, %y
%x1_gt_y1 = icmp sgt <8 x i64> %x1, %y1
%res = icmp eq <8 x i1>%x_gt_y, %x1_gt_y1
@@ -566,6 +975,19 @@ define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32>
; SKX-NEXT: vpmovm2b %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test29:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpgtd %ymm2, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc2]
+; AVX10-256-NEXT: vpcmpgtd %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x66,0xcb]
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
+; AVX10-256-NEXT: vpcmpgtd %ymm6, %ymm4, %k1 ## encoding: [0x62,0xf1,0x5d,0x28,0x66,0xce]
+; AVX10-256-NEXT: vpcmpgtd %ymm7, %ymm5, %k2 ## encoding: [0x62,0xf1,0x55,0x28,0x66,0xd7]
+; AVX10-256-NEXT: kunpckbw %k1, %k2, %k1 ## encoding: [0xc5,0xed,0x4b,0xc9]
+; AVX10-256-NEXT: kxorw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x47,0xc1]
+; AVX10-256-NEXT: vpmovm2b %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
+; AVX10-256-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%x_gt_y = icmp sgt <16 x i32> %x, %y
%x1_gt_y1 = icmp sgt <16 x i32> %x1, %y1
%res = icmp ne <16 x i1>%x_gt_y, %x1_gt_y1
@@ -588,6 +1010,12 @@ define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind {
; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc9,0x00]
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test30:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc9,0x00]
+; AVX10-256-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = fcmp oeq <4 x double> %x, %y
%max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y
@@ -611,6 +1039,12 @@ define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, ptr %yp) nounwind
; SKX-NEXT: vcmpltpd (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test31:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpltpd (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0x0f,0x01]
+; AVX10-256-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <2 x double>, ptr %yp, align 4
%mask = fcmp olt <2 x double> %x, %y
@@ -635,6 +1069,12 @@ define <2 x double> @test31_commute(<2 x double> %x, <2 x double> %x1, ptr %yp)
; SKX-NEXT: vcmpgtpd (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test31_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpgtpd (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0x0f,0x0e]
+; AVX10-256-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <2 x double>, ptr %yp, align 4
%mask = fcmp olt <2 x double> %y, %x
@@ -658,6 +1098,12 @@ define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, ptr %yp) nounwind
; SKX-NEXT: vcmpltpd (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test32:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpltpd (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x0f,0x01]
+; AVX10-256-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <4 x double>, ptr %yp, align 4
%mask = fcmp ogt <4 x double> %y, %x
@@ -681,6 +1127,12 @@ define <4 x double> @test32_commute(<4 x double> %x, <4 x double> %x1, ptr %yp)
; SKX-NEXT: vcmpgtpd (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test32_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpgtpd (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x0f,0x0e]
+; AVX10-256-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <4 x double>, ptr %yp, align 4
%mask = fcmp ogt <4 x double> %x, %y
@@ -689,11 +1141,25 @@ define <4 x double> @test32_commute(<4 x double> %x, <4 x double> %x1, ptr %yp)
}
define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, ptr %yp) nounwind {
-; CHECK-LABEL: test33:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vcmpltpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x01]
-; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test33:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vcmpltpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x01]
+; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test33:
+; SKX: ## %bb.0:
+; SKX-NEXT: vcmpltpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x01]
+; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test33:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpltpd 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x28,0xc2,0x4f,0x01,0x01]
+; AVX10-256-NEXT: vcmpltpd (%rdi), %ymm0, %k2 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x17,0x01]
+; AVX10-256-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x65,0xc0]
+; AVX10-256-NEXT: vblendmpd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x65,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <8 x double>, ptr %yp, align 4
%mask = fcmp olt <8 x double> %x, %y
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
@@ -701,11 +1167,25 @@ define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, ptr %yp) nounwind
}
define <8 x double> @test33_commute(<8 x double> %x, <8 x double> %x1, ptr %yp) nounwind {
-; CHECK-LABEL: test33_commute:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vcmpgtpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x0e]
-; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test33_commute:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vcmpgtpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x0e]
+; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test33_commute:
+; SKX: ## %bb.0:
+; SKX-NEXT: vcmpgtpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x0e]
+; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test33_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpgtpd 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x28,0xc2,0x4f,0x01,0x0e]
+; AVX10-256-NEXT: vcmpgtpd (%rdi), %ymm0, %k2 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x17,0x0e]
+; AVX10-256-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x65,0xc0]
+; AVX10-256-NEXT: vblendmpd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x65,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <8 x double>, ptr %yp, align 4
%mask = fcmp olt <8 x double> %y, %x
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
@@ -729,6 +1209,12 @@ define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, ptr %yp) nounwind {
; SKX-NEXT: vcmpltps (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test34:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpltps (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0x0f,0x01]
+; AVX10-256-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <4 x float>, ptr %yp, align 4
%mask = fcmp olt <4 x float> %x, %y
%max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
@@ -752,6 +1238,12 @@ define <4 x float> @test34_commute(<4 x float> %x, <4 x float> %x1, ptr %yp) nou
; SKX-NEXT: vcmpgtps (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test34_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpgtps (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0x0f,0x0e]
+; AVX10-256-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <4 x float>, ptr %yp, align 4
%mask = fcmp olt <4 x float> %y, %x
%max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
@@ -774,6 +1266,12 @@ define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, ptr %yp) nounwind {
; SKX-NEXT: vcmpltps (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test35:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpltps (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x0f,0x01]
+; AVX10-256-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <8 x float>, ptr %yp, align 4
%mask = fcmp ogt <8 x float> %y, %x
@@ -797,6 +1295,12 @@ define <8 x float> @test35_commute(<8 x float> %x, <8 x float> %x1, ptr %yp) nou
; SKX-NEXT: vcmpgtps (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test35_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpgtps (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x0f,0x0e]
+; AVX10-256-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <8 x float>, ptr %yp, align 4
%mask = fcmp ogt <8 x float> %x, %y
@@ -805,11 +1309,25 @@ define <8 x float> @test35_commute(<8 x float> %x, <8 x float> %x1, ptr %yp) nou
}
define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, ptr %yp) nounwind {
-; CHECK-LABEL: test36:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vcmpltps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x01]
-; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test36:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vcmpltps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x01]
+; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test36:
+; SKX: ## %bb.0:
+; SKX-NEXT: vcmpltps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x01]
+; SKX-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test36:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpltps 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0x4f,0x01,0x01]
+; AVX10-256-NEXT: vcmpltps (%rdi), %ymm0, %k2 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x17,0x01]
+; AVX10-256-NEXT: vblendmps %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x65,0xc0]
+; AVX10-256-NEXT: vblendmps %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x65,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <16 x float>, ptr %yp, align 4
%mask = fcmp olt <16 x float> %x, %y
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
@@ -817,11 +1335,25 @@ define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, ptr %yp) nounwind
}
define <16 x float> @test36_commute(<16 x float> %x, <16 x float> %x1, ptr %yp) nounwind {
-; CHECK-LABEL: test36_commute:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vcmpgtps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x0e]
-; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test36_commute:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vcmpgtps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x0e]
+; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test36_commute:
+; SKX: ## %bb.0:
+; SKX-NEXT: vcmpgtps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x0e]
+; SKX-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test36_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpgtps 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0x4f,0x01,0x0e]
+; AVX10-256-NEXT: vcmpgtps (%rdi), %ymm0, %k2 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x17,0x0e]
+; AVX10-256-NEXT: vblendmps %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x65,0xc0]
+; AVX10-256-NEXT: vblendmps %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x65,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%y = load <16 x float>, ptr %yp, align 4
%mask = fcmp olt <16 x float> %y, %x
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
@@ -829,11 +1361,26 @@ define <16 x float> @test36_commute(<16 x float> %x, <16 x float> %x1, ptr %yp)
}
define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, ptr %ptr) nounwind {
-; CHECK-LABEL: test37:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
-; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test37:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
+; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test37:
+; SKX: ## %bb.0:
+; SKX-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
+; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test37:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vbroadcastsd (%rdi), %ymm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x27]
+; AVX10-256-NEXT: vcmpltpd %ymm4, %ymm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x28,0xc2,0xcc,0x01]
+; AVX10-256-NEXT: vcmpltpd %ymm4, %ymm0, %k2 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xd4,0x01]
+; AVX10-256-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x65,0xc0]
+; AVX10-256-NEXT: vblendmpd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x65,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load double, ptr %ptr
%v = insertelement <8 x double> undef, double %a, i32 0
@@ -845,11 +1392,26 @@ define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, ptr %ptr) nounwin
}
define <8 x double> @test37_commute(<8 x double> %x, <8 x double> %x1, ptr %ptr) nounwind {
-; CHECK-LABEL: test37_commute:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
-; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test37_commute:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
+; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test37_commute:
+; SKX: ## %bb.0:
+; SKX-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
+; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test37_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vbroadcastsd (%rdi), %ymm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x27]
+; AVX10-256-NEXT: vcmpltpd %ymm1, %ymm4, %k1 ## encoding: [0x62,0xf1,0xdd,0x28,0xc2,0xc9,0x01]
+; AVX10-256-NEXT: vcmpltpd %ymm0, %ymm4, %k2 ## encoding: [0x62,0xf1,0xdd,0x28,0xc2,0xd0,0x01]
+; AVX10-256-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x65,0xc0]
+; AVX10-256-NEXT: vblendmpd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x65,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load double, ptr %ptr
%v = insertelement <8 x double> undef, double %a, i32 0
@@ -875,6 +1437,12 @@ define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, ptr %ptr) nounwin
; SKX-NEXT: vcmpltpd (%rdi){1to4}, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x38,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test38:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpltpd (%rdi){1to4}, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x38,0xc2,0x0f,0x01]
+; AVX10-256-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load double, ptr %ptr
%v = insertelement <4 x double> undef, double %a, i32 0
@@ -900,6 +1468,12 @@ define <4 x double> @test38_commute(<4 x double> %x, <4 x double> %x1, ptr %ptr)
; SKX-NEXT: vcmpgtpd (%rdi){1to4}, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x38,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test38_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpgtpd (%rdi){1to4}, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x38,0xc2,0x0f,0x0e]
+; AVX10-256-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load double, ptr %ptr
%v = insertelement <4 x double> undef, double %a, i32 0
@@ -926,6 +1500,12 @@ define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, ptr %ptr) nounwin
; SKX-NEXT: vcmpltpd (%rdi){1to2}, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x18,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test39:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpltpd (%rdi){1to2}, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x18,0xc2,0x0f,0x01]
+; AVX10-256-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load double, ptr %ptr
%v = insertelement <2 x double> undef, double %a, i32 0
@@ -952,6 +1532,12 @@ define <2 x double> @test39_commute(<2 x double> %x, <2 x double> %x1, ptr %ptr)
; SKX-NEXT: vcmpgtpd (%rdi){1to2}, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x18,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test39_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpgtpd (%rdi){1to2}, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x18,0xc2,0x0f,0x0e]
+; AVX10-256-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load double, ptr %ptr
%v = insertelement <2 x double> undef, double %a, i32 0
@@ -964,11 +1550,26 @@ define <2 x double> @test39_commute(<2 x double> %x, <2 x double> %x1, ptr %ptr)
define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, ptr %ptr) nounwind {
-; CHECK-LABEL: test40:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
-; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test40:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
+; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test40:
+; SKX: ## %bb.0:
+; SKX-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
+; SKX-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test40:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vbroadcastss (%rdi), %ymm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0x27]
+; AVX10-256-NEXT: vcmpltps %ymm4, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xcc,0x01]
+; AVX10-256-NEXT: vcmpltps %ymm4, %ymm0, %k2 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xd4,0x01]
+; AVX10-256-NEXT: vblendmps %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x65,0xc0]
+; AVX10-256-NEXT: vblendmps %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x65,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load float, ptr %ptr
%v = insertelement <16 x float> undef, float %a, i32 0
@@ -980,11 +1581,26 @@ define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, ptr %ptr) noun
}
define <16 x float> @test40_commute(<16 x float> %x, <16 x float> %x1, ptr %ptr) nounwind {
-; CHECK-LABEL: test40_commute:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
-; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
+; AVX512-LABEL: test40_commute:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
+; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; AVX512-NEXT: retq ## encoding: [0xc3]
+;
+; SKX-LABEL: test40_commute:
+; SKX: ## %bb.0:
+; SKX-NEXT: vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
+; SKX-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test40_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vbroadcastss (%rdi), %ymm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0x27]
+; AVX10-256-NEXT: vcmpltps %ymm1, %ymm4, %k1 ## encoding: [0x62,0xf1,0x5c,0x28,0xc2,0xc9,0x01]
+; AVX10-256-NEXT: vcmpltps %ymm0, %ymm4, %k2 ## encoding: [0x62,0xf1,0x5c,0x28,0xc2,0xd0,0x01]
+; AVX10-256-NEXT: vblendmps %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x65,0xc0]
+; AVX10-256-NEXT: vblendmps %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x65,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load float, ptr %ptr
%v = insertelement <16 x float> undef, float %a, i32 0
@@ -1010,6 +1626,12 @@ define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, ptr %ptr) nounwin
; SKX-NEXT: vcmpltps (%rdi){1to8}, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x38,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test41:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpltps (%rdi){1to8}, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x38,0xc2,0x0f,0x01]
+; AVX10-256-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load float, ptr %ptr
%v = insertelement <8 x float> undef, float %a, i32 0
@@ -1035,6 +1657,12 @@ define <8 x float> @test41_commute(<8 x float> %x, <8 x float> %x1, ptr %ptr)
; SKX-NEXT: vcmpgtps (%rdi){1to8}, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x38,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test41_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpgtps (%rdi){1to8}, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x38,0xc2,0x0f,0x0e]
+; AVX10-256-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load float, ptr %ptr
%v = insertelement <8 x float> undef, float %a, i32 0
@@ -1061,6 +1689,12 @@ define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, ptr %ptr) nounwin
; SKX-NEXT: vcmpltps (%rdi){1to4}, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test42:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpltps (%rdi){1to4}, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0x0f,0x01]
+; AVX10-256-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load float, ptr %ptr
%v = insertelement <4 x float> undef, float %a, i32 0
@@ -1087,6 +1721,12 @@ define <4 x float> @test42_commute(<4 x float> %x, <4 x float> %x1, ptr %ptr)
; SKX-NEXT: vcmpgtps (%rdi){1to4}, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test42_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpgtps (%rdi){1to4}, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0x0f,0x0e]
+; AVX10-256-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load float, ptr %ptr
%v = insertelement <4 x float> undef, float %a, i32 0
@@ -1122,6 +1762,18 @@ define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, ptr %ptr,<8 x i1>
; SKX-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test43:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllw $15, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0x71,0xf4,0x0f]
+; AVX10-256-NEXT: vpmovw2m %xmm4, %k1 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xcc]
+; AVX10-256-NEXT: vbroadcastsd (%rdi), %ymm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x27]
+; AVX10-256-NEXT: vcmpltpd %ymm4, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xc2,0xd4,0x01]
+; AVX10-256-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x65,0xc0]
+; AVX10-256-NEXT: kshiftrb $4, %k1, %k1 ## encoding: [0xc4,0xe3,0x79,0x30,0xc9,0x04]
+; AVX10-256-NEXT: vcmpltpd %ymm4, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0xc2,0xcc,0x01]
+; AVX10-256-NEXT: vblendmpd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x65,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load double, ptr %ptr
%v = insertelement <8 x double> undef, double %a, i32 0
@@ -1158,6 +1810,18 @@ define <8 x double> @test43_commute(<8 x double> %x, <8 x double> %x1, ptr %ptr,
; SKX-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test43_commute:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllw $15, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0x71,0xf4,0x0f]
+; AVX10-256-NEXT: vpmovw2m %xmm4, %k1 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xcc]
+; AVX10-256-NEXT: vbroadcastsd (%rdi), %ymm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x27]
+; AVX10-256-NEXT: vcmpltpd %ymm0, %ymm4, %k2 {%k1} ## encoding: [0x62,0xf1,0xdd,0x29,0xc2,0xd0,0x01]
+; AVX10-256-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x65,0xc0]
+; AVX10-256-NEXT: kshiftrb $4, %k1, %k1 ## encoding: [0xc4,0xe3,0x79,0x30,0xc9,0x04]
+; AVX10-256-NEXT: vcmpltpd %ymm1, %ymm4, %k1 {%k1} ## encoding: [0x62,0xf1,0xdd,0x29,0xc2,0xc9,0x01]
+; AVX10-256-NEXT: vblendmpd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x65,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = load double, ptr %ptr
%v = insertelement <8 x double> undef, double %a, i32 0
@@ -1181,6 +1845,12 @@ define <4 x i32> @test44(<4 x i16> %x, <4 x i16> %y) #0 {
; SKX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
; SKX-NEXT: vpmovm2d %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test44:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
+; AVX10-256-NEXT: vpmovm2d %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = icmp eq <4 x i16> %x, %y
%1 = sext <4 x i1> %mask to <4 x i32>
ret <4 x i32> %1
@@ -1202,6 +1872,13 @@ define <2 x i64> @test45(<2 x i16> %x, <2 x i16> %y) #0 {
; SKX-NEXT: vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x3f]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test45:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
+; AVX10-256-NEXT: vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
+; AVX10-256-NEXT: vpsrlq $63, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x3f]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = icmp eq <2 x i16> %x, %y
%1 = zext <2 x i1> %mask to <2 x i64>
ret <2 x i64> %1
@@ -1223,6 +1900,13 @@ define <2 x i64> @test46(<2 x float> %x, <2 x float> %y) #0 {
; SKX-NEXT: vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x3f]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test46:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vcmpeqps %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x00]
+; AVX10-256-NEXT: vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
+; AVX10-256-NEXT: vpsrlq $63, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x3f]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = fcmp oeq <2 x float> %x, %y
%1 = zext <2 x i1> %mask to <2 x i64>
ret <2 x i64> %1
@@ -1254,6 +1938,15 @@ define <16 x i8> @test47(<16 x i32> %a, <16 x i8> %b, <16 x i8> %c) {
; SKX-NEXT: vpblendmb %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x66,0xc1]
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test47:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vptestnmd %ymm0, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc0]
+; AVX10-256-NEXT: vptestnmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x76,0x28,0x27,0xc9]
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k1 ## encoding: [0xc5,0xf5,0x4b,0xc8]
+; AVX10-256-NEXT: vpblendmb %xmm2, %xmm3, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x65,0x09,0x66,0xc2]
+; AVX10-256-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%cmp = icmp eq <16 x i32> %a, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i8> %b, <16 x i8> %c
ret <16 x i8> %res
@@ -1282,6 +1975,14 @@ define <16 x i16> @test48(<16 x i32> %a, <16 x i16> %b, <16 x i16> %c) {
; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
; SKX-NEXT: vpblendmw %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x66,0xc1]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test48:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vptestnmd %ymm0, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc0]
+; AVX10-256-NEXT: vptestnmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x76,0x28,0x27,0xc9]
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k1 ## encoding: [0xc5,0xf5,0x4b,0xc8]
+; AVX10-256-NEXT: vpblendmw %ymm2, %ymm3, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x66,0xc2]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%cmp = icmp eq <16 x i32> %a, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i16> %b, <16 x i16> %c
ret <16 x i16> %res
@@ -1313,6 +2014,16 @@ define <8 x i16> @test49(<8 x i64> %a, <8 x i16> %b, <8 x i16> %c) {
; SKX-NEXT: vpblendmw %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x66,0xc1]
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test49:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vptestnmq %ymm0, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc0]
+; AVX10-256-NEXT: vptestnmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf6,0x28,0x27,0xc9]
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1 ## encoding: [0xc4,0xe3,0x79,0x32,0xc9,0x04]
+; AVX10-256-NEXT: korb %k1, %k0, %k1 ## encoding: [0xc5,0xfd,0x45,0xc9]
+; AVX10-256-NEXT: vpblendmw %xmm2, %xmm3, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xe5,0x09,0x66,0xc2]
+; AVX10-256-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%cmp = icmp eq <8 x i64> %a, zeroinitializer
%res = select <8 x i1> %cmp, <8 x i16> %b, <8 x i16> %c
ret <8 x i16> %res
@@ -1342,6 +2053,16 @@ define i16 @pcmpeq_mem_1(<16 x i32> %a, ptr %b) {
; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: pcmpeq_mem_1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0x07]
+; AVX10-256-NEXT: vpcmpeqd 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x76,0x4f,0x01]
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
+; AVX10-256-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; AVX10-256-NEXT: ## kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%load = load <16 x i32>, ptr %b
%cmp = icmp eq <16 x i32> %a, %load
%cast = bitcast <16 x i1> %cmp to i16
@@ -1374,6 +2095,16 @@ define i16 @pcmpeq_mem_2(<16 x i32> %a, ptr %b) {
; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: pcmpeq_mem_2:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0x07]
+; AVX10-256-NEXT: vpcmpeqd 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x76,0x4f,0x01]
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
+; AVX10-256-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; AVX10-256-NEXT: ## kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%load = load <16 x i32>, ptr %b
%cmp = icmp eq <16 x i32> %load, %a
%cast = bitcast <16 x i1> %cmp to i16
@@ -1394,6 +2125,11 @@ define <2 x i64> @PR41066(<2 x i64> %t0, <2 x double> %x, <2 x double> %y) {
; SKX: ## %bb.0:
; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: PR41066:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vxorps %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%t1 = fcmp ogt <2 x double> %x, %y
%t2 = select <2 x i1> %t1, <2 x i64> <i64 undef, i64 0>, <2 x i64> zeroinitializer
ret <2 x i64> %t2
@@ -1421,6 +2157,16 @@ define <4 x i32> @zext_bool_logic(<4 x i64> %cond1, <4 x i64> %cond2, <4 x i32>
; SKX-NEXT: vpsubd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfa,0xc0]
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: zext_bool_logic:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vptestnmq %ymm0, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc0]
+; AVX10-256-NEXT: vptestnmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf6,0x28,0x27,0xc9]
+; AVX10-256-NEXT: korw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x45,0xc1]
+; AVX10-256-NEXT: vpmovm2d %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
+; AVX10-256-NEXT: vpsubd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfa,0xc0]
+; AVX10-256-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%a = icmp eq <4 x i64> %cond1, zeroinitializer
%b = icmp eq <4 x i64> %cond2, zeroinitializer
%c = or <4 x i1> %a, %b
@@ -1531,6 +2277,17 @@ define void @half_vec_compare(ptr %x, ptr %y) {
; SKX-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
; SKX-NEXT: vpextrw $0, %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x15,0x06,0x00]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: half_vec_compare:
+; AVX10-256: ## %bb.0: ## %entry
+; AVX10-256-NEXT: vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
+; AVX10-256-NEXT: ## xmm0 = mem[0],zero,zero,zero
+; AVX10-256-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x57,0xc9]
+; AVX10-256-NEXT: vcmpneqph %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7c,0x08,0xc2,0xc9,0x04]
+; AVX10-256-NEXT: vmovdqu8 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x05,A,A,A,A]
+; AVX10-256-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
+; AVX10-256-NEXT: vpextrw $0, %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x15,0x06,0x00]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
entry:
%0 = load <2 x half>, ptr %x
%1 = fcmp une <2 x half> %0, zeroinitializer
@@ -1571,6 +2328,16 @@ define <8 x i64> @cmp_swap_bug(ptr %x, <8 x i64> %y, <8 x i64> %z) {
; SKX-NEXT: vpmovb2m %xmm2, %k1 ## encoding: [0x62,0xf2,0x7e,0x08,0x29,0xca]
; SKX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: cmp_swap_bug:
+; AVX10-256: ## %bb.0: ## %entry
+; AVX10-256-NEXT: vmovdqa (%rdi), %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x27]
+; AVX10-256-NEXT: vpmovwb %xmm4, %xmm4 ## encoding: [0x62,0xf2,0x7e,0x08,0x30,0xe4]
+; AVX10-256-NEXT: vpmovb2m %xmm4, %k1 ## encoding: [0x62,0xf2,0x7e,0x08,0x29,0xcc]
+; AVX10-256-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x64,0xc0]
+; AVX10-256-NEXT: kshiftrb $4, %k1, %k1 ## encoding: [0xc4,0xe3,0x79,0x30,0xc9,0x04]
+; AVX10-256-NEXT: vpblendmq %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x64,0xc9]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
entry:
%0 = load <16 x i8>, ptr %x
%1 = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -1594,6 +2361,12 @@ define <2 x i32> @narrow_cmp_select_reverse(<2 x i64> %x, <2 x i32> %y) nounwind
; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
; SKX-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xc1]
; SKX-NEXT: retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: narrow_cmp_select_reverse:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
+; AVX10-256-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xc1]
+; AVX10-256-NEXT: retq ## encoding: [0xc3]
%mask = icmp eq <2 x i64> %x, zeroinitializer
%res = select <2 x i1> %mask, <2 x i32> zeroinitializer, <2 x i32> %y
ret <2 x i32> %res
diff --git a/llvm/test/CodeGen/X86/avx512bw-mask-op.ll b/llvm/test/CodeGen/X86/avx512bw-mask-op.ll
index 2a262644836135b..3c91c2948fc9094 100644
--- a/llvm/test/CodeGen/X86/avx512bw-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-mask-op.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=CHECK,AVX10-256
define i32 @mask32(i32 %x) {
; CHECK-LABEL: mask32:
@@ -54,12 +55,22 @@ define void @mask32_mem(ptr %ptr) {
}
define void @mask64_mem(ptr %ptr) {
-; CHECK-LABEL: mask64_mem:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovq (%rdi), %k0
-; CHECK-NEXT: knotq %k0, %k0
-; CHECK-NEXT: kmovq %k0, (%rdi)
-; CHECK-NEXT: retq
+; SKX-LABEL: mask64_mem:
+; SKX: ## %bb.0:
+; SKX-NEXT: kmovq (%rdi), %k0
+; SKX-NEXT: knotq %k0, %k0
+; SKX-NEXT: kmovq %k0, (%rdi)
+; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: mask64_mem:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovd (%rdi), %k0
+; AVX10-256-NEXT: kmovd 4(%rdi), %k1
+; AVX10-256-NEXT: knotd %k1, %k1
+; AVX10-256-NEXT: knotd %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, (%rdi)
+; AVX10-256-NEXT: kmovd %k1, 4(%rdi)
+; AVX10-256-NEXT: retq
%x = load i64, ptr %ptr, align 4
%m0 = bitcast i64 %x to <64 x i1>
%m1 = xor <64 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
@@ -123,13 +134,27 @@ define i64 @mand64(i64 %x, i64 %y) {
}
define i64 @mand64_mem(ptr %x, ptr %y) {
-; CHECK-LABEL: mand64_mem:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovq (%rdi), %k0
-; CHECK-NEXT: kmovq (%rsi), %k1
-; CHECK-NEXT: korq %k1, %k0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; SKX-LABEL: mand64_mem:
+; SKX: ## %bb.0:
+; SKX-NEXT: kmovq (%rdi), %k0
+; SKX-NEXT: kmovq (%rsi), %k1
+; SKX-NEXT: korq %k1, %k0, %k0
+; SKX-NEXT: kmovq %k0, %rax
+; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: mand64_mem:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: kmovd (%rdi), %k0
+; AVX10-256-NEXT: kmovd 4(%rdi), %k1
+; AVX10-256-NEXT: kmovd (%rsi), %k2
+; AVX10-256-NEXT: kord %k2, %k0, %k0
+; AVX10-256-NEXT: kmovd 4(%rsi), %k2
+; AVX10-256-NEXT: kord %k2, %k1, %k1
+; AVX10-256-NEXT: kmovd %k0, %ecx
+; AVX10-256-NEXT: kmovd %k1, %eax
+; AVX10-256-NEXT: shlq $32, %rax
+; AVX10-256-NEXT: orq %rcx, %rax
+; AVX10-256-NEXT: retq
%ma = load <64 x i1>, ptr %x
%mb = load <64 x i1>, ptr %y
%mc = and <64 x i1> %ma, %mb
@@ -229,12 +254,22 @@ define <32 x i1> @bitcast_f32_to_v32i1(float %x) {
}
define <64 x i1> @bitcast_f64_to_v64i1(double %x) {
-; CHECK-LABEL: bitcast_f64_to_v64i1:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vmovq %xmm0, %rax
-; CHECK-NEXT: kmovq %rax, %k0
-; CHECK-NEXT: vpmovm2b %k0, %zmm0
-; CHECK-NEXT: retq
+; SKX-LABEL: bitcast_f64_to_v64i1:
+; SKX: ## %bb.0:
+; SKX-NEXT: vmovq %xmm0, %rax
+; SKX-NEXT: kmovq %rax, %k0
+; SKX-NEXT: vpmovm2b %k0, %zmm0
+; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: bitcast_f64_to_v64i1:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vmovq %xmm0, %rax
+; AVX10-256-NEXT: kmovd %eax, %k0
+; AVX10-256-NEXT: shrq $32, %rax
+; AVX10-256-NEXT: kmovd %eax, %k1
+; AVX10-256-NEXT: vpmovm2b %k0, %ymm0
+; AVX10-256-NEXT: vpmovm2b %k1, %ymm1
+; AVX10-256-NEXT: retq
%a = bitcast double %x to <64 x i1>
ret <64 x i1> %a
}
@@ -252,14 +287,28 @@ define float @bitcast_v32i1_to_f32(<32 x i1> %x) {
}
define double @bitcast_v64i1_to_f64(<64 x i1> %x) {
-; CHECK-LABEL: bitcast_v64i1_to_f64:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpsllw $7, %zmm0, %zmm0
-; CHECK-NEXT: vpmovb2m %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vmovq %rax, %xmm0
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; SKX-LABEL: bitcast_v64i1_to_f64:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpsllw $7, %zmm0, %zmm0
+; SKX-NEXT: vpmovb2m %zmm0, %k0
+; SKX-NEXT: kmovq %k0, %rax
+; SKX-NEXT: vmovq %rax, %xmm0
+; SKX-NEXT: vzeroupper
+; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: bitcast_v64i1_to_f64:
+; AVX10-256: ## %bb.0:
+; AVX10-256-NEXT: vpsllw $7, %ymm1, %ymm1
+; AVX10-256-NEXT: vpmovb2m %ymm1, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: shlq $32, %rax
+; AVX10-256-NEXT: vpsllw $7, %ymm0, %ymm0
+; AVX10-256-NEXT: vpmovb2m %ymm0, %k0
+; AVX10-256-NEXT: kmovd %k0, %ecx
+; AVX10-256-NEXT: orq %rax, %rcx
+; AVX10-256-NEXT: vmovq %rcx, %xmm0
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = bitcast <64 x i1> %x to double
ret double %a
}
diff --git a/llvm/test/CodeGen/X86/kshift.ll b/llvm/test/CodeGen/X86/kshift.ll
index 0acf82f5a144a2e..16444adb1dc568b 100644
--- a/llvm/test/CodeGen/X86/kshift.ll
+++ b/llvm/test/CodeGen/X86/kshift.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=KNL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq,avx512bw | FileCheck %s --check-prefix=SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx10.1-256 | FileCheck %s --check-prefix=AVX10-256
define i8 @kshiftl_v8i1_1(<8 x i64> %x, <8 x i64> %y) {
; KNL-LABEL: kshiftl_v8i1_1:
@@ -22,6 +23,23 @@ define i8 @kshiftl_v8i1_1(<8 x i64> %x, <8 x i64> %y) {
; SKX-NEXT: # kill: def $al killed $al killed $eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftl_v8i1_1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: kshiftlb $1, %k0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: kandb %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <8 x i64> %x, zeroinitializer
%b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
%c = icmp eq <8 x i64> %y, zeroinitializer
@@ -50,6 +68,21 @@ define i16 @kshiftl_v16i1_1(<16 x i32> %x, <16 x i32> %y) {
; SKX-NEXT: # kill: def $ax killed $ax killed $eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftl_v16i1_1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT: kshiftlw $1, %k0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmd %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kunpckbw %k1, %k2, %k1
+; AVX10-256-NEXT: kandw %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <16 x i32> %x, zeroinitializer
%b = shufflevector <16 x i1> %a, <16 x i1> zeroinitializer, <16 x i32> <i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
%c = icmp eq <16 x i32> %y, zeroinitializer
@@ -96,6 +129,20 @@ define i32 @kshiftl_v32i1_1(<32 x i16> %x, <32 x i16> %y) {
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftl_v32i1_1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmw %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmw %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kunpckwd %k0, %k1, %k0
+; AVX10-256-NEXT: kshiftld $1, %k0, %k0
+; AVX10-256-NEXT: vptestnmw %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmw %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kunpckwd %k1, %k2, %k1
+; AVX10-256-NEXT: kandd %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <32 x i16> %x, zeroinitializer
%b = shufflevector <32 x i1> %a, <32 x i1> zeroinitializer, <32 x i32> <i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
%c = icmp eq <32 x i16> %y, zeroinitializer
@@ -166,6 +213,25 @@ define i64 @kshiftl_v64i1_1(<64 x i8> %x, <64 x i8> %y) {
; SKX-NEXT: kmovq %k0, %rax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftl_v64i1_1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmb %ymm1, %ymm1, %k0
+; AVX10-256-NEXT: vptestnmb %ymm0, %ymm0, %k1
+; AVX10-256-NEXT: vpmovm2b %k1, %ymm0
+; AVX10-256-NEXT: vpmovm2b %k0, %ymm1
+; AVX10-256-NEXT: vmovdqa {{.*#+}} ymm4 = [63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
+; AVX10-256-NEXT: vpermi2b %ymm0, %ymm1, %ymm4
+; AVX10-256-NEXT: vpmovb2m %ymm4, %k2
+; AVX10-256-NEXT: kshiftld $1, %k1, %k1
+; AVX10-256-NEXT: vptestnmb %ymm3, %ymm3, %k0 {%k2}
+; AVX10-256-NEXT: kmovd %k0, %ecx
+; AVX10-256-NEXT: shlq $32, %rcx
+; AVX10-256-NEXT: vptestnmb %ymm2, %ymm2, %k0 {%k1}
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: orq %rcx, %rax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <64 x i8> %x, zeroinitializer
%b = shufflevector <64 x i1> %a, <64 x i1> zeroinitializer, <64 x i32> <i32 64, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
%c = icmp eq <64 x i8> %y, zeroinitializer
@@ -194,6 +260,20 @@ define i8 @kshiftl_v8i1_7(<8 x i64> %x, <8 x i64> %y) {
; SKX-NEXT: # kill: def $al killed $al killed $eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftl_v8i1_7:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: kshiftlb $7, %k0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: kandb %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <8 x i64> %x, zeroinitializer
%b = shufflevector <8 x i1> zeroinitializer, <8 x i1> %a, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
%c = icmp eq <8 x i64> %y, zeroinitializer
@@ -222,6 +302,19 @@ define i16 @kshiftl_v16i1_15(<16 x i32> %x, <16 x i32> %y) {
; SKX-NEXT: # kill: def $ax killed $ax killed $eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftl_v16i1_15:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: kshiftlw $15, %k0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmd %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kunpckbw %k1, %k2, %k1
+; AVX10-256-NEXT: kandw %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <16 x i32> %x, zeroinitializer
%b = shufflevector <16 x i1> zeroinitializer, <16 x i1> %a, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
%c = icmp eq <16 x i32> %y, zeroinitializer
@@ -255,6 +348,18 @@ define i32 @kshiftl_v32i1_31(<32 x i16> %x, <32 x i16> %y) {
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftl_v32i1_31:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmw %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: kshiftld $31, %k0, %k0
+; AVX10-256-NEXT: vptestnmw %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmw %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kunpckwd %k1, %k2, %k1
+; AVX10-256-NEXT: kandd %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <32 x i16> %x, zeroinitializer
%b = shufflevector <32 x i1> zeroinitializer, <32 x i1> %a, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32>
%c = icmp eq <32 x i16> %y, zeroinitializer
@@ -291,6 +396,16 @@ define i64 @kshiftl_v64i1_63(<64 x i8> %x, <64 x i8> %y) {
; SKX-NEXT: kmovq %k0, %rax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftl_v64i1_63:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmb %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: kshiftld $31, %k0, %k1
+; AVX10-256-NEXT: vptestnmb %ymm3, %ymm3, %k0 {%k1}
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: shlq $32, %rax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <64 x i8> %x, zeroinitializer
%b = shufflevector <64 x i1> zeroinitializer, <64 x i1> %a, <64 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64>
%c = icmp eq <64 x i8> %y, zeroinitializer
@@ -320,6 +435,23 @@ define i8 @kshiftr_v8i1_1(<8 x i64> %x, <8 x i64> %y) {
; SKX-NEXT: # kill: def $al killed $al killed $eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftr_v8i1_1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: kshiftrb $1, %k0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: kandb %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <8 x i64> %x, zeroinitializer
%b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
%c = icmp eq <8 x i64> %y, zeroinitializer
@@ -348,6 +480,21 @@ define i16 @kshiftr_v16i1_1(<16 x i32> %x, <16 x i32> %y) {
; SKX-NEXT: # kill: def $ax killed $ax killed $eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftr_v16i1_1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT: kshiftrw $1, %k0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmd %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kunpckbw %k1, %k2, %k1
+; AVX10-256-NEXT: kandw %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <16 x i32> %x, zeroinitializer
%b = shufflevector <16 x i1> %a, <16 x i1> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
%c = icmp eq <16 x i32> %y, zeroinitializer
@@ -394,6 +541,20 @@ define i32 @kshiftr_v32i1_1(<32 x i16> %x, <32 x i16> %y) {
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftr_v32i1_1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmw %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmw %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kunpckwd %k0, %k1, %k0
+; AVX10-256-NEXT: kshiftrd $1, %k0, %k0
+; AVX10-256-NEXT: vptestnmw %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmw %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kunpckwd %k1, %k2, %k1
+; AVX10-256-NEXT: kandd %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <32 x i16> %x, zeroinitializer
%b = shufflevector <32 x i1> %a, <32 x i1> zeroinitializer, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32>
%c = icmp eq <32 x i16> %y, zeroinitializer
@@ -464,6 +625,25 @@ define i64 @kshiftr_v64i1_1(<64 x i8> %x, <64 x i8> %y) {
; SKX-NEXT: kmovq %k0, %rax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftr_v64i1_1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmb %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmb %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: vpmovm2b %k1, %ymm0
+; AVX10-256-NEXT: vpmovm2b %k0, %ymm1
+; AVX10-256-NEXT: vmovdqa {{.*#+}} ymm4 = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32]
+; AVX10-256-NEXT: vpermi2b %ymm0, %ymm1, %ymm4
+; AVX10-256-NEXT: vpmovb2m %ymm4, %k2
+; AVX10-256-NEXT: kshiftrd $1, %k1, %k1
+; AVX10-256-NEXT: vptestnmb %ymm2, %ymm2, %k0 {%k2}
+; AVX10-256-NEXT: kmovd %k0, %ecx
+; AVX10-256-NEXT: vptestnmb %ymm3, %ymm3, %k0 {%k1}
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: shlq $32, %rax
+; AVX10-256-NEXT: orq %rcx, %rax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <64 x i8> %x, zeroinitializer
%b = shufflevector <64 x i1> %a, <64 x i1> zeroinitializer, <64 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64>
%c = icmp eq <64 x i8> %y, zeroinitializer
@@ -492,6 +672,23 @@ define i8 @kshiftr_v8i1_7(<8 x i64> %x, <8 x i64> %y) {
; SKX-NEXT: # kill: def $al killed $al killed $eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftr_v8i1_7:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: kshiftlb $1, %k0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: kandb %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <8 x i64> %x, zeroinitializer
%b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
%c = icmp eq <8 x i64> %y, zeroinitializer
@@ -520,6 +717,20 @@ define i16 @kshiftr_v16i1_15(<16 x i32> %x, <16 x i32> %y) {
; SKX-NEXT: # kill: def $ax killed $ax killed $eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftr_v16i1_15:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmd %ymm1, %ymm1, %k0
+; AVX10-256-NEXT: kshiftlw $8, %k0, %k0
+; AVX10-256-NEXT: kshiftrw $15, %k0, %k0
+; AVX10-256-NEXT: vptestnmd %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmd %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kunpckbw %k1, %k2, %k1
+; AVX10-256-NEXT: kandw %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <16 x i32> %x, zeroinitializer
%b = shufflevector <16 x i1> zeroinitializer, <16 x i1> %a, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
%c = icmp eq <16 x i32> %y, zeroinitializer
@@ -552,6 +763,19 @@ define i32 @kshiftr_v32i1_31(<32 x i16> %x, <32 x i16> %y) {
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftr_v32i1_31:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmw %ymm1, %ymm1, %k0
+; AVX10-256-NEXT: kshiftld $16, %k0, %k0
+; AVX10-256-NEXT: kshiftrd $31, %k0, %k0
+; AVX10-256-NEXT: vptestnmw %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmw %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kunpckwd %k1, %k2, %k1
+; AVX10-256-NEXT: kandd %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <32 x i16> %x, zeroinitializer
%b = shufflevector <32 x i1> zeroinitializer, <32 x i1> %a, <32 x i32> <i32 63, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
%c = icmp eq <32 x i16> %y, zeroinitializer
@@ -586,6 +810,15 @@ define i64 @kshiftr_v64i1_63(<64 x i8> %x, <64 x i8> %y) {
; SKX-NEXT: kmovq %k0, %rax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftr_v64i1_63:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmb %ymm1, %ymm1, %k0
+; AVX10-256-NEXT: kshiftrd $31, %k0, %k1
+; AVX10-256-NEXT: vptestnmb %ymm2, %ymm2, %k0 {%k1}
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <64 x i8> %x, zeroinitializer
%b = shufflevector <64 x i1> zeroinitializer, <64 x i1> %a, <64 x i32> <i32 127, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
%c = icmp eq <64 x i8> %y, zeroinitializer
@@ -614,6 +847,23 @@ define i8 @kshiftl_v8i1_zu123u56(<8 x i64> %x, <8 x i64> %y) {
; SKX-NEXT: # kill: def $al killed $al killed $eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftl_v8i1_zu123u56:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: kshiftlb $1, %k0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: kandb %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <8 x i64> %x, zeroinitializer
%b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 8, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 5, i32 6>
%c = icmp eq <8 x i64> %y, zeroinitializer
@@ -642,6 +892,23 @@ define i8 @kshiftl_v8i1_u0123456(<8 x i64> %x, <8 x i64> %y) {
; SKX-NEXT: # kill: def $al killed $al killed $eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftl_v8i1_u0123456:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: kshiftlb $1, %k0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: kandb %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <8 x i64> %x, zeroinitializer
%b = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
%c = icmp eq <8 x i64> %y, zeroinitializer
@@ -671,6 +938,23 @@ define i8 @kshiftr_v8i1_1u3u567z(<8 x i64> %x, <8 x i64> %y) {
; SKX-NEXT: # kill: def $al killed $al killed $eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftr_v8i1_1u3u567z:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: kshiftrb $1, %k0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: kandb %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <8 x i64> %x, zeroinitializer
%b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 undef, i32 5, i32 6, i32 7, i32 8>
%c = icmp eq <8 x i64> %y, zeroinitializer
@@ -699,6 +983,23 @@ define i8 @kshiftr_v8i1_234567uu(<8 x i64> %x, <8 x i64> %y) {
; SKX-NEXT: # kill: def $al killed $al killed $eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: kshiftr_v8i1_234567uu:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm1, %ymm1, %k1
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: kshiftrb $2, %k0, %k0
+; AVX10-256-NEXT: vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT: vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT: kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT: korb %k2, %k1, %k1
+; AVX10-256-NEXT: kandb %k1, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp eq <8 x i64> %x, zeroinitializer
%b = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 10>
%c = icmp eq <8 x i64> %y, zeroinitializer
diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index f26bbb7e5c2bdac..ecd833bbe6e5aaa 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -5,6 +5,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,KNL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=AVX,AVX10-256
define i1 @allones_v16i8_sign(<16 x i8> %arg) {
; SSE-LABEL: allones_v16i8_sign:
@@ -80,6 +81,14 @@ define i1 @allones_v32i8_sign(<32 x i8> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v32i8_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpmovmskb %ymm0, %eax
+; AVX10-256-NEXT: cmpl $-1, %eax
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = icmp slt <32 x i8> %arg, zeroinitializer
%tmp1 = bitcast <32 x i1> %tmp to i32
%tmp2 = icmp eq i32 %tmp1, -1
@@ -120,6 +129,14 @@ define i1 @allzeros_v32i8_sign(<32 x i8> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v32i8_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpmovmskb %ymm0, %eax
+; AVX10-256-NEXT: testl %eax, %eax
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = icmp slt <32 x i8> %arg, zeroinitializer
%tmp1 = bitcast <32 x i1> %tmp to i32
%tmp2 = icmp eq i32 %tmp1, 0
@@ -176,6 +193,15 @@ define i1 @allones_v64i8_sign(<64 x i8> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v64i8_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpand %ymm0, %ymm1, %ymm0
+; AVX10-256-NEXT: vpmovb2m %ymm0, %k0
+; AVX10-256-NEXT: kortestd %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = icmp slt <64 x i8> %arg, zeroinitializer
%tmp1 = bitcast <64 x i1> %tmp to i64
%tmp2 = icmp eq i64 %tmp1, -1
@@ -232,6 +258,15 @@ define i1 @allzeros_v64i8_sign(<64 x i8> %arg) {
; SKX-NEXT: sete %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v64i8_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT: vpmovb2m %ymm0, %k0
+; AVX10-256-NEXT: kortestd %k0, %k0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = icmp slt <64 x i8> %arg, zeroinitializer
%tmp1 = bitcast <64 x i1> %tmp to i64
%tmp2 = icmp eq i64 %tmp1, 0
@@ -273,6 +308,13 @@ define i1 @allones_v8i16_sign(<8 x i16> %arg) {
; SKX-NEXT: kortestb %k0, %k0
; SKX-NEXT: setb %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v8i16_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpmovw2m %xmm0, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: retq
%tmp = icmp slt <8 x i16> %arg, zeroinitializer
%tmp1 = bitcast <8 x i1> %tmp to i8
%tmp2 = icmp eq i8 %tmp1, -1
@@ -311,6 +353,13 @@ define i1 @allzeros_v8i16_sign(<8 x i16> %arg) {
; SKX-NEXT: kortestb %k0, %k0
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v8i16_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpmovw2m %xmm0, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = icmp slt <8 x i16> %arg, zeroinitializer
%tmp1 = bitcast <8 x i1> %tmp to i8
%tmp2 = icmp eq i8 %tmp1, 0
@@ -364,6 +413,14 @@ define i1 @allones_v16i16_sign(<16 x i16> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v16i16_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpmovw2m %ymm0, %k0
+; AVX10-256-NEXT: kortestw %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = icmp slt <16 x i16> %arg, zeroinitializer
%tmp1 = bitcast <16 x i1> %tmp to i16
%tmp2 = icmp eq i16 %tmp1, -1
@@ -415,6 +472,14 @@ define i1 @allzeros_v16i16_sign(<16 x i16> %arg) {
; SKX-NEXT: sete %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v16i16_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpmovw2m %ymm0, %k0
+; AVX10-256-NEXT: kortestw %k0, %k0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = icmp slt <16 x i16> %arg, zeroinitializer
%tmp1 = bitcast <16 x i1> %tmp to i16
%tmp2 = icmp eq i16 %tmp1, 0
@@ -479,6 +544,16 @@ define i1 @allones_v32i16_sign(<32 x i16> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v32i16_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpmovw2m %ymm0, %k0
+; AVX10-256-NEXT: vpmovw2m %ymm1, %k1
+; AVX10-256-NEXT: kunpckwd %k0, %k1, %k0
+; AVX10-256-NEXT: kortestd %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = icmp slt <32 x i16> %arg, zeroinitializer
%tmp1 = bitcast <32 x i1> %tmp to i32
%tmp2 = icmp eq i32 %tmp1, -1
@@ -539,6 +614,16 @@ define i1 @allzeros_v32i16_sign(<32 x i16> %arg) {
; SKX-NEXT: sete %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v32i16_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpmovw2m %ymm0, %k0
+; AVX10-256-NEXT: vpmovw2m %ymm1, %k1
+; AVX10-256-NEXT: kunpckwd %k0, %k1, %k0
+; AVX10-256-NEXT: kortestd %k0, %k0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = icmp slt <32 x i16> %arg, zeroinitializer
%tmp1 = bitcast <32 x i1> %tmp to i32
%tmp2 = icmp eq i32 %tmp1, 0
@@ -618,6 +703,14 @@ define i1 @allones_v8i32_sign(<8 x i32> %arg) {
; AVX512-NEXT: setb %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v8i32_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX10-256-NEXT: vtestps %ymm1, %ymm0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = icmp slt <8 x i32> %arg, zeroinitializer
%tmp1 = bitcast <8 x i1> %tmp to i8
%tmp2 = icmp eq i8 %tmp1, -1
@@ -697,6 +790,16 @@ define i1 @allones_v16i32_sign(<16 x i32> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v16i32_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpmovd2m %ymm0, %k0
+; AVX10-256-NEXT: vpmovd2m %ymm1, %k1
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT: kortestw %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = icmp slt <16 x i32> %arg, zeroinitializer
%tmp1 = bitcast <16 x i1> %tmp to i16
%tmp2 = icmp eq i16 %tmp1, -1
@@ -755,6 +858,16 @@ define i1 @allzeros_v16i32_sign(<16 x i32> %arg) {
; SKX-NEXT: sete %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v16i32_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpmovd2m %ymm0, %k0
+; AVX10-256-NEXT: vpmovd2m %ymm1, %k1
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT: kortestw %k0, %k0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = icmp slt <16 x i32> %arg, zeroinitializer
%tmp1 = bitcast <16 x i1> %tmp to i16
%tmp2 = icmp eq i16 %tmp1, 0
@@ -794,6 +907,14 @@ define i1 @allones_v4i64_sign(<4 x i64> %arg) {
; AVX512-NEXT: setb %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v4i64_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX10-256-NEXT: vtestpd %ymm1, %ymm0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = icmp slt <4 x i64> %arg, zeroinitializer
%tmp1 = bitcast <4 x i1> %tmp to i4
%tmp2 = icmp eq i4 %tmp1, -1
@@ -872,6 +993,17 @@ define i1 @allones_v8i64_sign(<8 x i64> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v8i64_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpmovq2m %ymm0, %k0
+; AVX10-256-NEXT: vpmovq2m %ymm1, %k1
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = icmp slt <8 x i64> %arg, zeroinitializer
%tmp1 = bitcast <8 x i1> %tmp to i8
%tmp2 = icmp eq i8 %tmp1, -1
@@ -926,6 +1058,17 @@ define i1 @allzeros_v8i64_sign(<8 x i64> %arg) {
; SKX-NEXT: sete %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v8i64_sign:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpmovq2m %ymm0, %k0
+; AVX10-256-NEXT: vpmovq2m %ymm1, %k1
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = icmp slt <8 x i64> %arg, zeroinitializer
%tmp1 = bitcast <8 x i1> %tmp to i8
%tmp2 = icmp eq i8 %tmp1, 0
@@ -963,6 +1106,13 @@ define i1 @allones_v16i8_and1(<16 x i8> %arg) {
; SKX-NEXT: kortestw %k0, %k0
; SKX-NEXT: setb %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v16i8_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; AVX10-256-NEXT: kortestw %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: retq
%tmp = and <16 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
%tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -1099,6 +1249,14 @@ define i1 @allzeros_v8i64_not(<8 x i64> %a0) {
; AVX512-NEXT: setne %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v8i64_not:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT: vptest %ymm0, %ymm0
+; AVX10-256-NEXT: setne %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%1 = icmp eq <8 x i64> %a0, zeroinitializer
%2 = bitcast <8 x i1> %1 to i8
%3 = icmp ne i8 %2, -1
@@ -1138,6 +1296,13 @@ define i1 @allzeros_v16i8_and1(<16 x i8> %arg) {
; SKX-NEXT: vptest %xmm1, %xmm0
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v16i8_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} xmm1 = [72340172838076673,72340172838076673]
+; AVX10-256-NEXT: vptest %xmm1, %xmm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = and <16 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
%tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -1191,6 +1356,14 @@ define i1 @allones_v32i8_and1(<32 x i8> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v32i8_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; AVX10-256-NEXT: kortestd %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <32 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
%tmp2 = bitcast <32 x i1> %tmp1 to i32
@@ -1237,6 +1410,14 @@ define i1 @allzeros_v32i8_and1(<32 x i8> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v32i8_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673]
+; AVX10-256-NEXT: vptest %ymm1, %ymm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <32 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
%tmp2 = bitcast <32 x i1> %tmp1 to i32
@@ -1298,6 +1479,16 @@ define i1 @allones_v64i8_and1(<64 x i8> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v64i8_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastb {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX10-256-NEXT: vptestmb %ymm2, %ymm0, %k1
+; AVX10-256-NEXT: vptestmb %ymm2, %ymm1, %k0 {%k1}
+; AVX10-256-NEXT: kortestd %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <64 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
%tmp2 = bitcast <64 x i1> %tmp1 to i64
@@ -1350,6 +1541,15 @@ define i1 @allzeros_v64i8_and1(<64 x i8> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v64i8_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673]
+; AVX10-256-NEXT: vptest %ymm1, %ymm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <64 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
%tmp2 = bitcast <64 x i1> %tmp1 to i64
@@ -1394,6 +1594,13 @@ define i1 @allones_v8i16_and1(<8 x i16> %arg) {
; SKX-NEXT: kortestb %k0, %k0
; SKX-NEXT: setb %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v8i16_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: retq
%tmp = and <8 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
%tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -1434,6 +1641,13 @@ define i1 @allzeros_v8i16_and1(<8 x i16> %arg) {
; SKX-NEXT: vptest %xmm1, %xmm0
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v8i16_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} xmm1 = [281479271743489,281479271743489]
+; AVX10-256-NEXT: vptest %xmm1, %xmm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = and <8 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
%tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -1493,6 +1707,14 @@ define i1 @allones_v16i16_and1(<16 x i16> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v16i16_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; AVX10-256-NEXT: kortestw %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <16 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
%tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -1565,6 +1787,17 @@ define i1 @allones_v32i16_and1(<32 x i16> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v32i16_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastw {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX10-256-NEXT: vptestmw %ymm2, %ymm0, %k0
+; AVX10-256-NEXT: vptestmw %ymm2, %ymm1, %k1
+; AVX10-256-NEXT: kunpckwd %k0, %k1, %k0
+; AVX10-256-NEXT: kortestd %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <32 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
%tmp2 = bitcast <32 x i1> %tmp1 to i32
@@ -1617,6 +1850,15 @@ define i1 @allzeros_v32i16_and1(<32 x i16> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v32i16_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489]
+; AVX10-256-NEXT: vptest %ymm1, %ymm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <32 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
%tmp2 = bitcast <32 x i1> %tmp1 to i32
@@ -1663,6 +1905,14 @@ define i1 @allzeros_v16i16_and1(<16 x i16> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v16i16_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489]
+; AVX10-256-NEXT: vptest %ymm1, %ymm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <16 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
%tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -1704,6 +1954,14 @@ define i1 @allones_v4i32_and1(<4 x i32> %arg) {
; SKX-NEXT: cmpb $15, %al
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v4i32_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: cmpb $15, %al
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = and <4 x i32> %arg, <i32 1, i32 1, i32 1, i32 1>
%tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
%tmp2 = bitcast <4 x i1> %tmp1 to i4
@@ -1744,6 +2002,13 @@ define i1 @allzeros_v4i32_and1(<4 x i32> %arg) {
; SKX-NEXT: vptest %xmm1, %xmm0
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v4i32_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4294967297,4294967297]
+; AVX10-256-NEXT: vptest %xmm1, %xmm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = and <4 x i32> %arg, <i32 1, i32 1, i32 1, i32 1>
%tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
%tmp2 = bitcast <4 x i1> %tmp1 to i4
@@ -1800,6 +2065,14 @@ define i1 @allones_v8i32_and1(<8 x i32> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v8i32_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <8 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
%tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -1846,6 +2119,14 @@ define i1 @allzeros_v8i32_and1(<8 x i32> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v8i32_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297]
+; AVX10-256-NEXT: vptest %ymm1, %ymm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <8 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
%tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -1905,6 +2186,17 @@ define i1 @allones_v16i32_and1(<16 x i32> %arg) {
; AVX512-NEXT: setb %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v16i32_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
+; AVX10-256-NEXT: vptestmd %ymm2, %ymm0, %k0
+; AVX10-256-NEXT: vptestmd %ymm2, %ymm1, %k1
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT: kortestw %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <16 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
%tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -1957,6 +2249,15 @@ define i1 @allzeros_v16i32_and1(<16 x i32> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v16i32_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297]
+; AVX10-256-NEXT: vptest %ymm1, %ymm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <16 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
%tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -1999,6 +2300,14 @@ define i1 @allones_v2i64_and1(<2 x i64> %arg) {
; SKX-NEXT: cmpb $3, %al
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v2i64_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: cmpb $3, %al
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = and <2 x i64> %arg, <i64 1, i64 1>
%tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
%tmp2 = bitcast <2 x i1> %tmp1 to i2
@@ -2039,6 +2348,13 @@ define i1 @allzeros_v2i64_and1(<2 x i64> %arg) {
; SKX-NEXT: vptest %xmm1, %xmm0
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v2i64_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1]
+; AVX10-256-NEXT: vptest %xmm1, %xmm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = and <2 x i64> %arg, <i64 1, i64 1>
%tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
%tmp2 = bitcast <2 x i1> %tmp1 to i2
@@ -2095,6 +2411,15 @@ define i1 @allones_v4i64_and1(<4 x i64> %arg) {
; SKX-NEXT: sete %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v4i64_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: cmpb $15, %al
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <4 x i64> %arg, <i64 1, i64 1, i64 1, i64 1>
%tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
%tmp2 = bitcast <4 x i1> %tmp1 to i4
@@ -2141,6 +2466,14 @@ define i1 @allzeros_v4i64_and1(<4 x i64> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v4i64_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
+; AVX10-256-NEXT: vptest %ymm1, %ymm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <4 x i64> %arg, <i64 1, i64 1, i64 1, i64 1>
%tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
%tmp2 = bitcast <4 x i1> %tmp1 to i4
@@ -2208,6 +2541,18 @@ define i1 @allones_v8i64_and1(<8 x i64> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v8i64_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
+; AVX10-256-NEXT: vptestmq %ymm2, %ymm0, %k0
+; AVX10-256-NEXT: vptestmq %ymm2, %ymm1, %k1
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <8 x i64> %arg, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
%tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
%tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -2261,6 +2606,15 @@ define i1 @allzeros_v8i64_and1(<8 x i64> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v8i64_and1:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
+; AVX10-256-NEXT: vptest %ymm1, %ymm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <8 x i64> %arg, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
%tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
%tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -2299,6 +2653,13 @@ define i1 @allones_v16i8_and4(<16 x i8> %arg) {
; SKX-NEXT: kortestw %k0, %k0
; SKX-NEXT: setb %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v16i8_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; AVX10-256-NEXT: kortestw %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: retq
%tmp = and <16 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
%tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
%tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -2339,6 +2700,13 @@ define i1 @allzeros_v16i8_and4(<16 x i8> %arg) {
; SKX-NEXT: vptest %xmm1, %xmm0
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v16i8_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} xmm1 = [289360691352306692,289360691352306692]
+; AVX10-256-NEXT: vptest %xmm1, %xmm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = and <16 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
%tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
%tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -2392,6 +2760,14 @@ define i1 @allones_v32i8_and4(<32 x i8> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v32i8_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; AVX10-256-NEXT: kortestd %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <32 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
%tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
%tmp2 = bitcast <32 x i1> %tmp1 to i32
@@ -2438,6 +2814,14 @@ define i1 @allzeros_v32i8_and4(<32 x i8> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v32i8_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [289360691352306692,289360691352306692,289360691352306692,289360691352306692]
+; AVX10-256-NEXT: vptest %ymm1, %ymm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <32 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
%tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
%tmp2 = bitcast <32 x i1> %tmp1 to i32
@@ -2499,6 +2883,16 @@ define i1 @allones_v64i8_and4(<64 x i8> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v64i8_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastb {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
+; AVX10-256-NEXT: vptestmb %ymm2, %ymm0, %k1
+; AVX10-256-NEXT: vptestmb %ymm2, %ymm1, %k0 {%k1}
+; AVX10-256-NEXT: kortestd %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <64 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
%tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
%tmp2 = bitcast <64 x i1> %tmp1 to i64
@@ -2551,6 +2945,15 @@ define i1 @allzeros_v64i8_and4(<64 x i8> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v64i8_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [289360691352306692,289360691352306692,289360691352306692,289360691352306692]
+; AVX10-256-NEXT: vptest %ymm1, %ymm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <64 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
%tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
%tmp2 = bitcast <64 x i1> %tmp1 to i64
@@ -2595,6 +2998,13 @@ define i1 @allones_v8i16_and4(<8 x i16> %arg) {
; SKX-NEXT: kortestb %k0, %k0
; SKX-NEXT: setb %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v8i16_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: retq
%tmp = and <8 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
%tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
%tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -2635,6 +3045,13 @@ define i1 @allzeros_v8i16_and4(<8 x i16> %arg) {
; SKX-NEXT: vptest %xmm1, %xmm0
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v8i16_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1125917086973956,1125917086973956]
+; AVX10-256-NEXT: vptest %xmm1, %xmm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = and <8 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
%tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
%tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -2694,6 +3111,14 @@ define i1 @allones_v16i16_and4(<16 x i16> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v16i16_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; AVX10-256-NEXT: kortestw %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <16 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
%tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
%tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -2766,6 +3191,17 @@ define i1 @allones_v32i16_and4(<32 x i16> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v32i16_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastw {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
+; AVX10-256-NEXT: vptestmw %ymm2, %ymm0, %k0
+; AVX10-256-NEXT: vptestmw %ymm2, %ymm1, %k1
+; AVX10-256-NEXT: kunpckwd %k0, %k1, %k0
+; AVX10-256-NEXT: kortestd %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <32 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
%tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
%tmp2 = bitcast <32 x i1> %tmp1 to i32
@@ -2818,6 +3254,15 @@ define i1 @allzeros_v32i16_and4(<32 x i16> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v32i16_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1125917086973956,1125917086973956,1125917086973956,1125917086973956]
+; AVX10-256-NEXT: vptest %ymm1, %ymm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <32 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
%tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
%tmp2 = bitcast <32 x i1> %tmp1 to i32
@@ -2864,6 +3309,14 @@ define i1 @allzeros_v16i16_and4(<16 x i16> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v16i16_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1125917086973956,1125917086973956,1125917086973956,1125917086973956]
+; AVX10-256-NEXT: vptest %ymm1, %ymm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <16 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
%tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
%tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -2905,6 +3358,14 @@ define i1 @allones_v4i32_and4(<4 x i32> %arg) {
; SKX-NEXT: cmpb $15, %al
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v4i32_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: cmpb $15, %al
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = and <4 x i32> %arg, <i32 4, i32 4, i32 4, i32 4>
%tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
%tmp2 = bitcast <4 x i1> %tmp1 to i4
@@ -2945,6 +3406,13 @@ define i1 @allzeros_v4i32_and4(<4 x i32> %arg) {
; SKX-NEXT: vptest %xmm1, %xmm0
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v4i32_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} xmm1 = [17179869188,17179869188]
+; AVX10-256-NEXT: vptest %xmm1, %xmm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = and <4 x i32> %arg, <i32 4, i32 4, i32 4, i32 4>
%tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
%tmp2 = bitcast <4 x i1> %tmp1 to i4
@@ -3001,6 +3469,14 @@ define i1 @allones_v8i32_and4(<8 x i32> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v8i32_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <8 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
%tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
%tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -3047,6 +3523,14 @@ define i1 @allzeros_v8i32_and4(<8 x i32> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v8i32_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17179869188,17179869188,17179869188,17179869188]
+; AVX10-256-NEXT: vptest %ymm1, %ymm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <8 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
%tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
%tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -3106,6 +3590,17 @@ define i1 @allones_v16i32_and4(<16 x i32> %arg) {
; AVX512-NEXT: setb %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v16i32_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastd {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4]
+; AVX10-256-NEXT: vptestmd %ymm2, %ymm0, %k0
+; AVX10-256-NEXT: vptestmd %ymm2, %ymm1, %k1
+; AVX10-256-NEXT: kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT: kortestw %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <16 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
%tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
%tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -3158,6 +3653,15 @@ define i1 @allzeros_v16i32_and4(<16 x i32> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v16i32_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17179869188,17179869188,17179869188,17179869188]
+; AVX10-256-NEXT: vptest %ymm1, %ymm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <16 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
%tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
%tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -3200,6 +3704,14 @@ define i1 @allones_v2i64_and4(<2 x i64> %arg) {
; SKX-NEXT: cmpb $3, %al
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v2i64_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: cmpb $3, %al
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = and <2 x i64> %arg, <i64 4, i64 4>
%tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
%tmp2 = bitcast <2 x i1> %tmp1 to i2
@@ -3240,6 +3752,13 @@ define i1 @allzeros_v2i64_and4(<2 x i64> %arg) {
; SKX-NEXT: vptest %xmm1, %xmm0
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v2i64_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4,4]
+; AVX10-256-NEXT: vptest %xmm1, %xmm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%tmp = and <2 x i64> %arg, <i64 4, i64 4>
%tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
%tmp2 = bitcast <2 x i1> %tmp1 to i2
@@ -3296,6 +3815,15 @@ define i1 @allones_v4i64_and4(<4 x i64> %arg) {
; SKX-NEXT: sete %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v4i64_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: cmpb $15, %al
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <4 x i64> %arg, <i64 4, i64 4, i64 4, i64 4>
%tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
%tmp2 = bitcast <4 x i1> %tmp1 to i4
@@ -3342,6 +3870,14 @@ define i1 @allzeros_v4i64_and4(<4 x i64> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v4i64_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4]
+; AVX10-256-NEXT: vptest %ymm1, %ymm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <4 x i64> %arg, <i64 4, i64 4, i64 4, i64 4>
%tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
%tmp2 = bitcast <4 x i1> %tmp1 to i4
@@ -3409,6 +3945,18 @@ define i1 @allones_v8i64_and4(<8 x i64> %arg) {
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allones_v8i64_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4,4,4,4]
+; AVX10-256-NEXT: vptestmq %ymm2, %ymm0, %k0
+; AVX10-256-NEXT: vptestmq %ymm2, %ymm1, %k1
+; AVX10-256-NEXT: kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT: korb %k1, %k0, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: setb %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <8 x i64> %arg, <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
%tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
%tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -3462,6 +4010,15 @@ define i1 @allzeros_v8i64_and4(<8 x i64> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v8i64_and4:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4]
+; AVX10-256-NEXT: vptest %ymm1, %ymm0
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%tmp = and <8 x i64> %arg, <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
%tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
%tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -3510,6 +4067,15 @@ define i1 @allzeros_v8f32_nnan(<8 x float> %a0) {
; SKX-NEXT: setne %al
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: allzeros_v8f32_nnan:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX10-256-NEXT: vcmpneqps %ymm1, %ymm0, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: setne %al
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%1 = fcmp nnan une <8 x float> %a0, zeroinitializer
%2 = bitcast <8 x i1> %1 to i8
%3 = icmp ne i8 %2, 0
@@ -3639,6 +4205,12 @@ define i32 @movmskb256(<32 x i8> %x) {
; AVX512-NEXT: vpmovmskb %ymm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
+;
+; AVX10-256-LABEL: movmskb256:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpmovmskb %ymm0, %eax
+; AVX10-256-NEXT: vzeroupper
+; AVX10-256-NEXT: retq
%a = icmp slt <32 x i8> %x, zeroinitializer
%b = bitcast <32 x i1> %a to i32
ret i32 %b
@@ -3709,6 +4281,20 @@ define i1 @movmsk_v16i8(<16 x i8> %x, <16 x i8> %y) {
; SKX-NEXT: andb %cl, %al
; SKX-NEXT: # kill: def $al killed $al killed $eax
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_v16i8:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
+; AVX10-256-NEXT: kshiftrw $15, %k0, %k1
+; AVX10-256-NEXT: kmovd %k1, %ecx
+; AVX10-256-NEXT: kshiftrw $8, %k0, %k1
+; AVX10-256-NEXT: kmovd %k1, %edx
+; AVX10-256-NEXT: kshiftrw $3, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: xorb %dl, %al
+; AVX10-256-NEXT: andb %cl, %al
+; AVX10-256-NEXT: # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: retq
%cmp = icmp eq <16 x i8> %x, %y
%e1 = extractelement <16 x i1> %cmp, i32 3
%e2 = extractelement <16 x i1> %cmp, i32 8
@@ -3758,6 +4344,15 @@ define i1 @movmsk_v8i16(<8 x i16> %x, <8 x i16> %y) {
; SKX-NEXT: testb $-109, %al
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_v8i16:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
+; AVX10-256-NEXT: knotb %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: testb $-109, %al
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%cmp = icmp sgt <8 x i16> %x, %y
%e1 = extractelement <8 x i1> %cmp, i32 0
%e2 = extractelement <8 x i1> %cmp, i32 1
@@ -3819,6 +4414,17 @@ define i1 @movmsk_v4i32(<4 x i32> %x, <4 x i32> %y) {
; SKX-NEXT: xorb %cl, %al
; SKX-NEXT: # kill: def $al killed $al killed $eax
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_v4i32:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
+; AVX10-256-NEXT: kshiftrb $3, %k0, %k1
+; AVX10-256-NEXT: kmovd %k1, %ecx
+; AVX10-256-NEXT: kshiftrb $2, %k0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: xorb %cl, %al
+; AVX10-256-NEXT: # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT: retq
%cmp = icmp slt <4 x i32> %x, %y
%e1 = extractelement <4 x i1> %cmp, i32 2
%e2 = extractelement <4 x i1> %cmp, i32 3
@@ -3870,6 +4476,14 @@ define i1 @movmsk_and_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SKX-NEXT: cmpb $3, %al
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_and_v2i64:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vpcmpneqq %xmm1, %xmm0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: cmpb $3, %al
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%cmp = icmp ne <2 x i64> %x, %y
%e1 = extractelement <2 x i1> %cmp, i32 0
%e2 = extractelement <2 x i1> %cmp, i32 1
@@ -3946,6 +4560,14 @@ define i1 @movmsk_v4f32(<4 x float> %x, <4 x float> %y) {
; SKX-NEXT: testb $14, %al
; SKX-NEXT: setne %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_v4f32:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vcmpeq_uqps %xmm1, %xmm0, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: testb $14, %al
+; AVX10-256-NEXT: setne %al
+; AVX10-256-NEXT: retq
%cmp = fcmp ueq <4 x float> %x, %y
%e1 = extractelement <4 x i1> %cmp, i32 1
%e2 = extractelement <4 x i1> %cmp, i32 2
@@ -3991,6 +4613,14 @@ define i1 @movmsk_and_v2f64(<2 x double> %x, <2 x double> %y) {
; SKX-NEXT: cmpb $3, %al
; SKX-NEXT: sete %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_and_v2f64:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vcmplepd %xmm0, %xmm1, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: cmpb $3, %al
+; AVX10-256-NEXT: sete %al
+; AVX10-256-NEXT: retq
%cmp = fcmp oge <2 x double> %x, %y
%e1 = extractelement <2 x i1> %cmp, i32 0
%e2 = extractelement <2 x i1> %cmp, i32 1
@@ -4031,6 +4661,13 @@ define i1 @movmsk_or_v2f64(<2 x double> %x, <2 x double> %y) {
; SKX-NEXT: kortestb %k0, %k0
; SKX-NEXT: setne %al
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_or_v2f64:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vcmplepd %xmm0, %xmm1, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: setne %al
+; AVX10-256-NEXT: retq
%cmp = fcmp oge <2 x double> %x, %y
%e1 = extractelement <2 x i1> %cmp, i32 0
%e2 = extractelement <2 x i1> %cmp, i32 1
@@ -4074,6 +4711,16 @@ define i1 @movmsk_v16i8_var(<16 x i8> %x, <16 x i8> %y, i32 %z) {
; SKX-NEXT: andl $15, %edi
; SKX-NEXT: movzbl -24(%rsp,%rdi), %eax
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_v16i8_var:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: # kill: def $edi killed $edi def $rdi
+; AVX10-256-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
+; AVX10-256-NEXT: vpmovm2b %k0, %xmm0
+; AVX10-256-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX10-256-NEXT: andl $15, %edi
+; AVX10-256-NEXT: movzbl -24(%rsp,%rdi), %eax
+; AVX10-256-NEXT: retq
%cmp = icmp eq <16 x i8> %x, %y
%val = extractelement <16 x i1> %cmp, i32 %z
ret i1 %val
@@ -4121,6 +4768,16 @@ define i1 @movmsk_v8i16_var(<8 x i16> %x, <8 x i16> %y, i32 %z) {
; SKX-NEXT: andl $7, %edi
; SKX-NEXT: movzbl -24(%rsp,%rdi,2), %eax
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_v8i16_var:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: # kill: def $edi killed $edi def $rdi
+; AVX10-256-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
+; AVX10-256-NEXT: vpmovm2w %k0, %xmm0
+; AVX10-256-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX10-256-NEXT: andl $7, %edi
+; AVX10-256-NEXT: movzbl -24(%rsp,%rdi,2), %eax
+; AVX10-256-NEXT: retq
%cmp = icmp sgt <8 x i16> %x, %y
%val = extractelement <8 x i1> %cmp, i32 %z
ret i1 %val
@@ -4165,6 +4822,16 @@ define i1 @movmsk_v4i32_var(<4 x i32> %x, <4 x i32> %y, i32 %z) {
; SKX-NEXT: andl $3, %edi
; SKX-NEXT: movzbl -24(%rsp,%rdi,4), %eax
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_v4i32_var:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: # kill: def $edi killed $edi def $rdi
+; AVX10-256-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
+; AVX10-256-NEXT: vpmovm2d %k0, %xmm0
+; AVX10-256-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX10-256-NEXT: andl $3, %edi
+; AVX10-256-NEXT: movzbl -24(%rsp,%rdi,4), %eax
+; AVX10-256-NEXT: retq
%cmp = icmp slt <4 x i32> %x, %y
%val = extractelement <4 x i1> %cmp, i32 %z
ret i1 %val
@@ -4222,6 +4889,16 @@ define i1 @movmsk_v2i64_var(<2 x i64> %x, <2 x i64> %y, i32 %z) {
; SKX-NEXT: andl $1, %edi
; SKX-NEXT: movzbl -24(%rsp,%rdi,8), %eax
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_v2i64_var:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: # kill: def $edi killed $edi def $rdi
+; AVX10-256-NEXT: vpcmpneqq %xmm1, %xmm0, %k0
+; AVX10-256-NEXT: vpmovm2q %k0, %xmm0
+; AVX10-256-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX10-256-NEXT: andl $1, %edi
+; AVX10-256-NEXT: movzbl -24(%rsp,%rdi,8), %eax
+; AVX10-256-NEXT: retq
%cmp = icmp ne <2 x i64> %x, %y
%val = extractelement <2 x i1> %cmp, i32 %z
ret i1 %val
@@ -4269,6 +4946,16 @@ define i1 @movmsk_v4f32_var(<4 x float> %x, <4 x float> %y, i32 %z) {
; SKX-NEXT: andl $3, %edi
; SKX-NEXT: movzbl -24(%rsp,%rdi,4), %eax
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_v4f32_var:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: # kill: def $edi killed $edi def $rdi
+; AVX10-256-NEXT: vcmpeq_uqps %xmm1, %xmm0, %k0
+; AVX10-256-NEXT: vpmovm2d %k0, %xmm0
+; AVX10-256-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX10-256-NEXT: andl $3, %edi
+; AVX10-256-NEXT: movzbl -24(%rsp,%rdi,4), %eax
+; AVX10-256-NEXT: retq
%cmp = fcmp ueq <4 x float> %x, %y
%val = extractelement <4 x i1> %cmp, i32 %z
ret i1 %val
@@ -4313,6 +5000,16 @@ define i1 @movmsk_v2f64_var(<2 x double> %x, <2 x double> %y, i32 %z) {
; SKX-NEXT: andl $1, %edi
; SKX-NEXT: movzbl -24(%rsp,%rdi,8), %eax
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: movmsk_v2f64_var:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: # kill: def $edi killed $edi def $rdi
+; AVX10-256-NEXT: vcmplepd %xmm0, %xmm1, %k0
+; AVX10-256-NEXT: vpmovm2q %k0, %xmm0
+; AVX10-256-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX10-256-NEXT: andl $1, %edi
+; AVX10-256-NEXT: movzbl -24(%rsp,%rdi,8), %eax
+; AVX10-256-NEXT: retq
%cmp = fcmp oge <2 x double> %x, %y
%val = extractelement <2 x i1> %cmp, i32 %z
ret i1 %val
@@ -4371,6 +5068,18 @@ define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) {
; SKX-NEXT: testb $1, %cl
; SKX-NEXT: cmovel %edx, %eax
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: PR39665_c_ray:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vcmpltpd %xmm0, %xmm1, %k0
+; AVX10-256-NEXT: kmovd %k0, %ecx
+; AVX10-256-NEXT: testb $2, %cl
+; AVX10-256-NEXT: movl $42, %eax
+; AVX10-256-NEXT: movl $99, %edx
+; AVX10-256-NEXT: cmovel %edx, %eax
+; AVX10-256-NEXT: testb $1, %cl
+; AVX10-256-NEXT: cmovel %edx, %eax
+; AVX10-256-NEXT: retq
%cmp = fcmp ogt <2 x double> %x, %y
%e1 = extractelement <2 x i1> %cmp, i32 0
%e2 = extractelement <2 x i1> %cmp, i32 1
@@ -4423,6 +5132,16 @@ define i32 @PR39665_c_ray_opt(<2 x double> %x, <2 x double> %y) {
; SKX-NEXT: movl $99, %eax
; SKX-NEXT: cmovel %ecx, %eax
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: PR39665_c_ray_opt:
+; AVX10-256: # %bb.0:
+; AVX10-256-NEXT: vcmpltpd %xmm0, %xmm1, %k0
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: cmpb $3, %al
+; AVX10-256-NEXT: movl $42, %ecx
+; AVX10-256-NEXT: movl $99, %eax
+; AVX10-256-NEXT: cmovel %ecx, %eax
+; AVX10-256-NEXT: retq
%cmp = fcmp ogt <2 x double> %x, %y
%shift = shufflevector <2 x i1> %cmp, <2 x i1> poison, <2 x i32> <i32 1, i32 undef>
%1 = and <2 x i1> %cmp, %shift
@@ -4551,6 +5270,25 @@ define i32 @pr67287(<2 x i64> %broadcast.splatinsert25) {
; SKX-NEXT: movw $0, 0
; SKX-NEXT: xorl %eax, %eax
; SKX-NEXT: retq
+;
+; AVX10-256-LABEL: pr67287:
+; AVX10-256: # %bb.0: # %entry
+; AVX10-256-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX10-256-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX10-256-NEXT: vptestnmq %xmm0, %xmm0, %k0
+; AVX10-256-NEXT: kortestb %k0, %k0
+; AVX10-256-NEXT: jne .LBB97_2
+; AVX10-256-NEXT: # %bb.1: # %entry
+; AVX10-256-NEXT: kmovd %k0, %eax
+; AVX10-256-NEXT: testb $1, %al
+; AVX10-256-NEXT: jne .LBB97_2
+; AVX10-256-NEXT: # %bb.3: # %middle.block
+; AVX10-256-NEXT: xorl %eax, %eax
+; AVX10-256-NEXT: retq
+; AVX10-256-NEXT: .LBB97_2:
+; AVX10-256-NEXT: movw $0, 0
+; AVX10-256-NEXT: xorl %eax, %eax
+; AVX10-256-NEXT: retq
entry:
%0 = and <2 x i64> %broadcast.splatinsert25, <i64 4294967295, i64 4294967295>
%1 = icmp eq <2 x i64> %0, zeroinitializer
diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index d025fe955be5157..d7183cf47eb13a2 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -939,7 +939,8 @@ std::vector<MCInst> ExegesisX86Target::setRegTo(const MCSubtargetInfo &STI,
}
break;
case 64:
- if (STI.getFeatureBits()[X86::FeatureBWI]) {
+ if (STI.getFeatureBits()[X86::FeatureBWI] &&
+ STI.getFeatureBits()[X86::FeatureEVEX512]) {
ConstantInliner CI(Value);
return CI.loadAndFinalize(Reg, Value.getBitWidth(), X86::KMOVQkm);
}
>From 106ccecee311adfb9d040e74232bf8af580d0c04 Mon Sep 17 00:00:00 2001
From: Evgenii Kudriashov <evgenii.kudriashov at intel.com>
Date: Thu, 23 Nov 2023 20:42:07 -0800
Subject: [PATCH 2/2] fixup! [X86] Improve optmasks handling for AVX10.1-256
---
llvm/lib/Target/X86/X86InstrInfo.cpp | 8 +-
llvm/test/CodeGen/X86/avx512-mask-op.ll | 162 ++------
llvm/test/CodeGen/X86/avx512-vec-cmp.ll | 478 ++++++++----------------
llvm/test/CodeGen/X86/movmsk-cmp.ll | 266 ++++---------
4 files changed, 271 insertions(+), 643 deletions(-)
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 9424319cf7dfcb9..75e6891d8972e81 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -3526,7 +3526,8 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
// All KMASK RegClasses hold the same k registers, can be tested against anyone.
if (X86::VK16RegClass.contains(SrcReg)) {
if (X86::GR64RegClass.contains(DestReg)) {
- assert(Subtarget.hasBWI() && Subtarget.hasEVEX512());
+ assert(Subtarget.hasBWI() && Subtarget.hasEVEX512() &&
+ "KMOVQ requires BWI with EVEX512");
return X86::KMOVQrk;
}
if (X86::GR32RegClass.contains(DestReg))
@@ -3539,7 +3540,8 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
// All KMASK RegClasses hold the same k registers, can be tested against anyone.
if (X86::VK16RegClass.contains(DestReg)) {
if (X86::GR64RegClass.contains(SrcReg)) {
- assert(Subtarget.hasBWI() && Subtarget.hasEVEX512());
+ assert(Subtarget.hasBWI() && Subtarget.hasEVEX512() &&
+ "KMOVQ requires BWI with EVEX512");
return X86::KMOVQkr;
}
if (X86::GR32RegClass.contains(SrcReg))
@@ -3775,7 +3777,7 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
return Load ? X86::LD_Fp64m : X86::ST_Fp64m;
if (X86::VK64RegClass.hasSubClassEq(RC)) {
assert(STI.hasBWI() && STI.hasEVEX512() &&
- "KMOVQ requires BWI with 512-bit vectors");
+ "KMOVQ requires BWI with EVEX512");
return Load ? X86::KMOVQkm : X86::KMOVQmk;
}
llvm_unreachable("Unknown 8-byte regclass");
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 99eef49417f33b6..d2246ee2a33885e 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,KNL
-; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefixes=CHECK,SKX
-; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW
-; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512DQ
+; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,EVEX512,KNL
+; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefixes=CHECK,EVEX512,SKX
+; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,EVEX512,AVX512BW
+; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck %s --check-prefixes=CHECK,EVEX512,AVX512DQ
; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=CHECK,AVX10-256
; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=X86
@@ -4307,37 +4307,13 @@ define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
}
define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) {
-; KNL-LABEL: test_bitcast_v16i1_zext:
-; KNL: ## %bb.0:
-; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: addl %eax, %eax
-; KNL-NEXT: vzeroupper
-; KNL-NEXT: retq
-;
-; SKX-LABEL: test_bitcast_v16i1_zext:
-; SKX: ## %bb.0:
-; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
-; SKX-NEXT: kmovw %k0, %eax
-; SKX-NEXT: addl %eax, %eax
-; SKX-NEXT: vzeroupper
-; SKX-NEXT: retq
-;
-; AVX512BW-LABEL: test_bitcast_v16i1_zext:
-; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
-; AVX512BW-NEXT: kmovw %k0, %eax
-; AVX512BW-NEXT: addl %eax, %eax
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: test_bitcast_v16i1_zext:
-; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: addl %eax, %eax
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
+; EVEX512-LABEL: test_bitcast_v16i1_zext:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vptestnmd %zmm0, %zmm0, %k0
+; EVEX512-NEXT: kmovw %k0, %eax
+; EVEX512-NEXT: addl %eax, %eax
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
;
; AVX10-256-LABEL: test_bitcast_v16i1_zext:
; AVX10-256: ## %bb.0:
@@ -4611,69 +4587,21 @@ declare void @foo()
; Make sure we can use the ZF/CF flags from kortest to check for all ones.
define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) {
-; KNL-LABEL: ktest_allones:
-; KNL: ## %bb.0:
-; KNL-NEXT: vpord %zmm1, %zmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kortestw %k0, %k0
-; KNL-NEXT: je LBB67_2
-; KNL-NEXT: ## %bb.1: ## %bb.1
-; KNL-NEXT: pushq %rax
-; KNL-NEXT: .cfi_def_cfa_offset 16
-; KNL-NEXT: vzeroupper
-; KNL-NEXT: callq _foo
-; KNL-NEXT: addq $8, %rsp
-; KNL-NEXT: LBB67_2: ## %bb.2
-; KNL-NEXT: vzeroupper
-; KNL-NEXT: retq
-;
-; SKX-LABEL: ktest_allones:
-; SKX: ## %bb.0:
-; SKX-NEXT: vpord %zmm1, %zmm0, %zmm0
-; SKX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; SKX-NEXT: kortestw %k0, %k0
-; SKX-NEXT: je LBB67_2
-; SKX-NEXT: ## %bb.1: ## %bb.1
-; SKX-NEXT: pushq %rax
-; SKX-NEXT: .cfi_def_cfa_offset 16
-; SKX-NEXT: vzeroupper
-; SKX-NEXT: callq _foo
-; SKX-NEXT: addq $8, %rsp
-; SKX-NEXT: LBB67_2: ## %bb.2
-; SKX-NEXT: vzeroupper
-; SKX-NEXT: retq
-;
-; AVX512BW-LABEL: ktest_allones:
-; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512BW-NEXT: kortestw %k0, %k0
-; AVX512BW-NEXT: je LBB67_2
-; AVX512BW-NEXT: ## %bb.1: ## %bb.1
-; AVX512BW-NEXT: pushq %rax
-; AVX512BW-NEXT: .cfi_def_cfa_offset 16
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: callq _foo
-; AVX512BW-NEXT: addq $8, %rsp
-; AVX512BW-NEXT: LBB67_2: ## %bb.2
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: ktest_allones:
-; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
-; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512DQ-NEXT: kortestw %k0, %k0
-; AVX512DQ-NEXT: je LBB67_2
-; AVX512DQ-NEXT: ## %bb.1: ## %bb.1
-; AVX512DQ-NEXT: pushq %rax
-; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: callq _foo
-; AVX512DQ-NEXT: addq $8, %rsp
-; AVX512DQ-NEXT: LBB67_2: ## %bb.2
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
+; EVEX512-LABEL: ktest_allones:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpord %zmm1, %zmm0, %zmm0
+; EVEX512-NEXT: vptestmd %zmm0, %zmm0, %k0
+; EVEX512-NEXT: kortestw %k0, %k0
+; EVEX512-NEXT: je LBB67_2
+; EVEX512-NEXT: ## %bb.1: ## %bb.1
+; EVEX512-NEXT: pushq %rax
+; EVEX512-NEXT: .cfi_def_cfa_offset 16
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: callq _foo
+; EVEX512-NEXT: addq $8, %rsp
+; EVEX512-NEXT: LBB67_2: ## %bb.2
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
;
; AVX10-256-LABEL: ktest_allones:
; AVX10-256: ## %bb.0:
@@ -4895,37 +4823,13 @@ entry:
; Make sure we bring the -1 constant into the mask domain.
define void @mask_not_cast(ptr, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>) {
-; KNL-LABEL: mask_not_cast:
-; KNL: ## %bb.0:
-; KNL-NEXT: vpcmpnleud %zmm3, %zmm2, %k1
-; KNL-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1}
-; KNL-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
-; KNL-NEXT: vzeroupper
-; KNL-NEXT: retq
-;
-; SKX-LABEL: mask_not_cast:
-; SKX: ## %bb.0:
-; SKX-NEXT: vpcmpnleud %zmm3, %zmm2, %k1
-; SKX-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1}
-; SKX-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
-; SKX-NEXT: vzeroupper
-; SKX-NEXT: retq
-;
-; AVX512BW-LABEL: mask_not_cast:
-; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: vpcmpnleud %zmm3, %zmm2, %k1
-; AVX512BW-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1}
-; AVX512BW-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: mask_not_cast:
-; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: vpcmpnleud %zmm3, %zmm2, %k1
-; AVX512DQ-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1}
-; AVX512DQ-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
+; EVEX512-LABEL: mask_not_cast:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpnleud %zmm3, %zmm2, %k1
+; EVEX512-NEXT: vptestmd %zmm0, %zmm1, %k1 {%k1}
+; EVEX512-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
;
; AVX10-256-LABEL: mask_not_cast:
; AVX10-256: ## %bb.0:
diff --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
index 0736a559987e673..a97426f7a209041 100644
--- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
@@ -1,21 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,KNL
-; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
-; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefixes=SKX
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefixes=EVEX512,AVX512,KNL
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=EVEX512,AVX512,AVX512BW
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefixes=EVEX512,SKX
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=AVX10-256
define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
-; AVX512-LABEL: test1:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vcmpleps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x02]
-; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test1:
-; SKX: ## %bb.0:
-; SKX-NEXT: vcmpleps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x02]
-; SKX-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test1:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vcmpleps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x02]
+; EVEX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test1:
; AVX10-256: ## %bb.0:
@@ -30,17 +24,11 @@ define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
}
define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
-; AVX512-LABEL: test2:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vcmplepd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x02]
-; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test2:
-; SKX: ## %bb.0:
-; SKX-NEXT: vcmplepd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x02]
-; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test2:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vcmplepd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x02]
+; EVEX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test2:
; AVX10-256: ## %bb.0:
@@ -55,17 +43,11 @@ define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
}
define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, ptr %yp) nounwind {
-; AVX512-LABEL: test3:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x0f]
-; AVX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test3:
-; SKX: ## %bb.0:
-; SKX-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x0f]
-; SKX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test3:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x0f]
+; EVEX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test3:
; AVX10-256: ## %bb.0:
@@ -81,17 +63,11 @@ define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, ptr %yp) nounwind {
}
define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
-; AVX512-LABEL: test4_unsigned:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vpcmpnltud %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xc9,0x05]
-; AVX512-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test4_unsigned:
-; SKX: ## %bb.0:
-; SKX-NEXT: vpcmpnltud %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xc9,0x05]
-; SKX-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test4_unsigned:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpnltud %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xc9,0x05]
+; EVEX512-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test4_unsigned:
; AVX10-256: ## %bb.0:
@@ -106,17 +82,11 @@ define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1)
}
define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
-; AVX512-LABEL: test5:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc9]
-; AVX512-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test5:
-; SKX: ## %bb.0:
-; SKX-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc9]
-; SKX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test5:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc9]
+; EVEX512-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test5:
; AVX10-256: ## %bb.0:
@@ -131,17 +101,11 @@ define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
}
define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind {
-; AVX512-LABEL: test6_unsigned:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc9,0x06]
-; AVX512-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc2]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test6_unsigned:
-; SKX: ## %bb.0:
-; SKX-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc9,0x06]
-; SKX-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc2]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test6_unsigned:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc9,0x06]
+; EVEX512-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc2]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test6_unsigned:
; AVX10-256: ## %bb.0:
@@ -488,19 +452,12 @@ define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
}
define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
-; AVX512-LABEL: test14:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vpsubd %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc9]
-; AVX512-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x66,0xc8]
-; AVX512-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xc1]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test14:
-; SKX: ## %bb.0:
-; SKX-NEXT: vpsubd %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc9]
-; SKX-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x66,0xc8]
-; SKX-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xc1]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test14:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpsubd %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc9]
+; EVEX512-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x66,0xc8]
+; EVEX512-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xc1]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test14:
; AVX10-256: ## %bb.0:
@@ -520,19 +477,12 @@ define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
}
define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
-; AVX512-LABEL: test15:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vpsubq %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc9]
-; AVX512-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xc8]
-; AVX512-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0xc1]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test15:
-; SKX: ## %bb.0:
-; SKX-NEXT: vpsubq %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc9]
-; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xc8]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0xc1]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test15:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpsubq %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc9]
+; EVEX512-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xc8]
+; EVEX512-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0xc1]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test15:
; AVX10-256: ## %bb.0:
@@ -552,17 +502,11 @@ define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
}
define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
-; AVX512-LABEL: test16:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vpcmpnltd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x05]
-; AVX512-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test16:
-; SKX: ## %bb.0:
-; SKX-NEXT: vpcmpnltd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x05]
-; SKX-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test16:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpnltd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x05]
+; EVEX512-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test16:
; AVX10-256: ## %bb.0:
@@ -577,17 +521,11 @@ define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind
}
define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
-; AVX512-LABEL: test17:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0x0f]
-; AVX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test17:
-; SKX: ## %bb.0:
-; SKX-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0x0f]
-; SKX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test17:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0x0f]
+; EVEX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test17:
; AVX10-256: ## %bb.0:
@@ -603,17 +541,11 @@ define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
}
define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
-; AVX512-LABEL: test18:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vpcmpled (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0x0f,0x02]
-; AVX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test18:
-; SKX: ## %bb.0:
-; SKX-NEXT: vpcmpled (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0x0f,0x02]
-; SKX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test18:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpled (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0x0f,0x02]
+; EVEX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test18:
; AVX10-256: ## %bb.0:
@@ -629,17 +561,11 @@ define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
}
define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
-; AVX512-LABEL: test19:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vpcmpleud (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0x0f,0x02]
-; AVX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test19:
-; SKX: ## %bb.0:
-; SKX-NEXT: vpcmpleud (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0x0f,0x02]
-; SKX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test19:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpleud (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0x0f,0x02]
+; EVEX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test19:
; AVX10-256: ## %bb.0:
@@ -655,19 +581,12 @@ define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
}
define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
-; AVX512-LABEL: test20:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc9]
-; AVX512-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf1,0x6d,0x49,0x76,0xcb]
-; AVX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test20:
-; SKX: ## %bb.0:
-; SKX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc9]
-; SKX-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf1,0x6d,0x49,0x76,0xcb]
-; SKX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test20:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc9]
+; EVEX512-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf1,0x6d,0x49,0x76,0xcb]
+; EVEX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test20:
; AVX10-256: ## %bb.0:
@@ -686,19 +605,12 @@ define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i3
}
define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
-; AVX512-LABEL: test21:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vpcmpleq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x02]
-; AVX512-NEXT: vpcmpnltq %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf3,0xed,0x49,0x1f,0xcb,0x05]
-; AVX512-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x64,0xc0]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test21:
-; SKX: ## %bb.0:
-; SKX-NEXT: vpcmpleq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x02]
-; SKX-NEXT: vpcmpnltq %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf3,0xed,0x49,0x1f,0xcb,0x05]
-; SKX-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x64,0xc0]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test21:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpleq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x02]
+; EVEX512-NEXT: vpcmpnltq %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf3,0xed,0x49,0x1f,0xcb,0x05]
+; EVEX512-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test21:
; AVX10-256: ## %bb.0:
@@ -717,19 +629,12 @@ define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y
}
define <8 x i64> @test22(<8 x i64> %x, ptr %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
-; AVX512-LABEL: test22:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vpcmpgtq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xca]
-; AVX512-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0x0f]
-; AVX512-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test22:
-; SKX: ## %bb.0:
-; SKX-NEXT: vpcmpgtq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xca]
-; SKX-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0x0f]
-; SKX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test22:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpgtq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xca]
+; EVEX512-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0x0f]
+; EVEX512-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test22:
; AVX10-256: ## %bb.0:
@@ -749,19 +654,12 @@ define <8 x i64> @test22(<8 x i64> %x, ptr %y.ptr, <8 x i64> %x1, <8 x i64> %y1)
}
define <16 x i32> @test23(<16 x i32> %x, ptr %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
-; AVX512-LABEL: test23:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
-; AVX512-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0x0f,0x02]
-; AVX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test23:
-; SKX: ## %bb.0:
-; SKX-NEXT: vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
-; SKX-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0x0f,0x02]
-; SKX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test23:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
+; EVEX512-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0x0f,0x02]
+; EVEX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test23:
; AVX10-256: ## %bb.0:
@@ -781,17 +679,11 @@ define <16 x i32> @test23(<16 x i32> %x, ptr %y.ptr, <16 x i32> %x1, <16 x i32>
}
define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, ptr %yb.ptr) nounwind {
-; AVX512-LABEL: test24:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x58,0x29,0x0f]
-; AVX512-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test24:
-; SKX: ## %bb.0:
-; SKX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x58,0x29,0x0f]
-; SKX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test24:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x58,0x29,0x0f]
+; EVEX512-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test24:
; AVX10-256: ## %bb.0:
@@ -810,17 +702,11 @@ define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, ptr %yb.ptr) nounwind {
}
define <16 x i32> @test25(<16 x i32> %x, ptr %yb.ptr, <16 x i32> %x1) nounwind {
-; AVX512-LABEL: test25:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x58,0x1f,0x0f,0x02]
-; AVX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test25:
-; SKX: ## %bb.0:
-; SKX-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x58,0x1f,0x0f,0x02]
-; SKX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test25:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x58,0x1f,0x0f,0x02]
+; EVEX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test25:
; AVX10-256: ## %bb.0:
@@ -839,19 +725,12 @@ define <16 x i32> @test25(<16 x i32> %x, ptr %yb.ptr, <16 x i32> %x1) nounwind {
}
define <16 x i32> @test26(<16 x i32> %x, ptr %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
-; AVX512-LABEL: test26:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
-; AVX512-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x66,0x0f]
-; AVX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test26:
-; SKX: ## %bb.0:
-; SKX-NEXT: vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
-; SKX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x66,0x0f]
-; SKX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test26:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
+; EVEX512-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x66,0x0f]
+; EVEX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test26:
; AVX10-256: ## %bb.0:
@@ -874,19 +753,12 @@ define <16 x i32> @test26(<16 x i32> %x, ptr %yb.ptr, <16 x i32> %x1, <16 x i32>
}
define <8 x i64> @test27(<8 x i64> %x, ptr %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
-; AVX512-LABEL: test27:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vpcmpnltq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x05]
-; AVX512-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x59,0x1f,0x0f,0x02]
-; AVX512-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test27:
-; SKX: ## %bb.0:
-; SKX-NEXT: vpcmpnltq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x05]
-; SKX-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x59,0x1f,0x0f,0x02]
-; SKX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test27:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vpcmpnltq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x05]
+; EVEX512-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x59,0x1f,0x0f,0x02]
+; EVEX512-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test27:
; AVX10-256: ## %bb.0:
@@ -1141,17 +1013,11 @@ define <4 x double> @test32_commute(<4 x double> %x, <4 x double> %x1, ptr %yp)
}
define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, ptr %yp) nounwind {
-; AVX512-LABEL: test33:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vcmpltpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x01]
-; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test33:
-; SKX: ## %bb.0:
-; SKX-NEXT: vcmpltpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x01]
-; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test33:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vcmpltpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x01]
+; EVEX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test33:
; AVX10-256: ## %bb.0:
@@ -1167,17 +1033,11 @@ define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, ptr %yp) nounwind
}
define <8 x double> @test33_commute(<8 x double> %x, <8 x double> %x1, ptr %yp) nounwind {
-; AVX512-LABEL: test33_commute:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vcmpgtpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x0e]
-; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test33_commute:
-; SKX: ## %bb.0:
-; SKX-NEXT: vcmpgtpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x0e]
-; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test33_commute:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vcmpgtpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x0e]
+; EVEX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test33_commute:
; AVX10-256: ## %bb.0:
@@ -1309,17 +1169,11 @@ define <8 x float> @test35_commute(<8 x float> %x, <8 x float> %x1, ptr %yp) nou
}
define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, ptr %yp) nounwind {
-; AVX512-LABEL: test36:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vcmpltps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x01]
-; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test36:
-; SKX: ## %bb.0:
-; SKX-NEXT: vcmpltps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x01]
-; SKX-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test36:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vcmpltps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x01]
+; EVEX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test36:
; AVX10-256: ## %bb.0:
@@ -1335,17 +1189,11 @@ define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, ptr %yp) nounwind
}
define <16 x float> @test36_commute(<16 x float> %x, <16 x float> %x1, ptr %yp) nounwind {
-; AVX512-LABEL: test36_commute:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vcmpgtps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x0e]
-; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test36_commute:
-; SKX: ## %bb.0:
-; SKX-NEXT: vcmpgtps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x0e]
-; SKX-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test36_commute:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vcmpgtps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x0e]
+; EVEX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test36_commute:
; AVX10-256: ## %bb.0:
@@ -1361,17 +1209,11 @@ define <16 x float> @test36_commute(<16 x float> %x, <16 x float> %x1, ptr %yp)
}
define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, ptr %ptr) nounwind {
-; AVX512-LABEL: test37:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
-; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test37:
-; SKX: ## %bb.0:
-; SKX-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
-; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test37:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
+; EVEX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test37:
; AVX10-256: ## %bb.0:
@@ -1392,17 +1234,11 @@ define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, ptr %ptr) nounwin
}
define <8 x double> @test37_commute(<8 x double> %x, <8 x double> %x1, ptr %ptr) nounwind {
-; AVX512-LABEL: test37_commute:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
-; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test37_commute:
-; SKX: ## %bb.0:
-; SKX-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
-; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test37_commute:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
+; EVEX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test37_commute:
; AVX10-256: ## %bb.0:
@@ -1550,17 +1386,11 @@ define <2 x double> @test39_commute(<2 x double> %x, <2 x double> %x1, ptr %ptr)
define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, ptr %ptr) nounwind {
-; AVX512-LABEL: test40:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
-; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test40:
-; SKX: ## %bb.0:
-; SKX-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
-; SKX-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test40:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
+; EVEX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test40:
; AVX10-256: ## %bb.0:
@@ -1581,17 +1411,11 @@ define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, ptr %ptr) noun
}
define <16 x float> @test40_commute(<16 x float> %x, <16 x float> %x1, ptr %ptr) nounwind {
-; AVX512-LABEL: test40_commute:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
-; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; AVX512-NEXT: retq ## encoding: [0xc3]
-;
-; SKX-LABEL: test40_commute:
-; SKX: ## %bb.0:
-; SKX-NEXT: vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
-; SKX-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; SKX-NEXT: retq ## encoding: [0xc3]
+; EVEX512-LABEL: test40_commute:
+; EVEX512: ## %bb.0:
+; EVEX512-NEXT: vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
+; EVEX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; EVEX512-NEXT: retq ## encoding: [0xc3]
;
; AVX10-256-LABEL: test40_commute:
; AVX10-256: ## %bb.0:
diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index ecd833bbe6e5aaa..53f99431598bf13 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -3,9 +3,9 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,KNL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=AVX,AVX10-256
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,EVEX512,KNL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,EVEX512,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=AVX,AVX512,AVX10-256
define i1 @allones_v16i8_sign(<16 x i8> %arg) {
; SSE-LABEL: allones_v16i8_sign:
@@ -81,14 +81,6 @@ define i1 @allones_v32i8_sign(<32 x i8> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
-;
-; AVX10-256-LABEL: allones_v32i8_sign:
-; AVX10-256: # %bb.0:
-; AVX10-256-NEXT: vpmovmskb %ymm0, %eax
-; AVX10-256-NEXT: cmpl $-1, %eax
-; AVX10-256-NEXT: sete %al
-; AVX10-256-NEXT: vzeroupper
-; AVX10-256-NEXT: retq
%tmp = icmp slt <32 x i8> %arg, zeroinitializer
%tmp1 = bitcast <32 x i1> %tmp to i32
%tmp2 = icmp eq i32 %tmp1, -1
@@ -129,14 +121,6 @@ define i1 @allzeros_v32i8_sign(<32 x i8> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
-;
-; AVX10-256-LABEL: allzeros_v32i8_sign:
-; AVX10-256: # %bb.0:
-; AVX10-256-NEXT: vpmovmskb %ymm0, %eax
-; AVX10-256-NEXT: testl %eax, %eax
-; AVX10-256-NEXT: sete %al
-; AVX10-256-NEXT: vzeroupper
-; AVX10-256-NEXT: retq
%tmp = icmp slt <32 x i8> %arg, zeroinitializer
%tmp1 = bitcast <32 x i1> %tmp to i32
%tmp2 = icmp eq i32 %tmp1, 0
@@ -703,14 +687,6 @@ define i1 @allones_v8i32_sign(<8 x i32> %arg) {
; AVX512-NEXT: setb %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
-;
-; AVX10-256-LABEL: allones_v8i32_sign:
-; AVX10-256: # %bb.0:
-; AVX10-256-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
-; AVX10-256-NEXT: vtestps %ymm1, %ymm0
-; AVX10-256-NEXT: setb %al
-; AVX10-256-NEXT: vzeroupper
-; AVX10-256-NEXT: retq
%tmp = icmp slt <8 x i32> %arg, zeroinitializer
%tmp1 = bitcast <8 x i1> %tmp to i8
%tmp2 = icmp eq i8 %tmp1, -1
@@ -907,14 +883,6 @@ define i1 @allones_v4i64_sign(<4 x i64> %arg) {
; AVX512-NEXT: setb %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
-;
-; AVX10-256-LABEL: allones_v4i64_sign:
-; AVX10-256: # %bb.0:
-; AVX10-256-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
-; AVX10-256-NEXT: vtestpd %ymm1, %ymm0
-; AVX10-256-NEXT: setb %al
-; AVX10-256-NEXT: vzeroupper
-; AVX10-256-NEXT: retq
%tmp = icmp slt <4 x i64> %arg, zeroinitializer
%tmp1 = bitcast <4 x i1> %tmp to i4
%tmp2 = icmp eq i4 %tmp1, -1
@@ -1242,13 +1210,13 @@ define i1 @allzeros_v8i64_not(<8 x i64> %a0) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: allzeros_v8i64_not:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: setne %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; EVEX512-LABEL: allzeros_v8i64_not:
+; EVEX512: # %bb.0:
+; EVEX512-NEXT: vptestmd %zmm0, %zmm0, %k0
+; EVEX512-NEXT: kortestw %k0, %k0
+; EVEX512-NEXT: setne %al
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
;
; AVX10-256-LABEL: allzeros_v8i64_not:
; AVX10-256: # %bb.0:
@@ -1410,14 +1378,6 @@ define i1 @allzeros_v32i8_and1(<32 x i8> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
-;
-; AVX10-256-LABEL: allzeros_v32i8_and1:
-; AVX10-256: # %bb.0:
-; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673]
-; AVX10-256-NEXT: vptest %ymm1, %ymm0
-; AVX10-256-NEXT: sete %al
-; AVX10-256-NEXT: vzeroupper
-; AVX10-256-NEXT: retq
%tmp = and <32 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
%tmp2 = bitcast <32 x i1> %tmp1 to i32
@@ -1534,13 +1494,13 @@ define i1 @allzeros_v64i8_and1(<64 x i8> %arg) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: allzeros_v64i8_and1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; EVEX512-LABEL: allzeros_v64i8_and1:
+; EVEX512: # %bb.0:
+; EVEX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
+; EVEX512-NEXT: kortestw %k0, %k0
+; EVEX512-NEXT: sete %al
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
;
; AVX10-256-LABEL: allzeros_v64i8_and1:
; AVX10-256: # %bb.0:
@@ -1843,13 +1803,13 @@ define i1 @allzeros_v32i16_and1(<32 x i16> %arg) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: allzeros_v32i16_and1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; EVEX512-LABEL: allzeros_v32i16_and1:
+; EVEX512: # %bb.0:
+; EVEX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
+; EVEX512-NEXT: kortestw %k0, %k0
+; EVEX512-NEXT: sete %al
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
;
; AVX10-256-LABEL: allzeros_v32i16_and1:
; AVX10-256: # %bb.0:
@@ -1905,14 +1865,6 @@ define i1 @allzeros_v16i16_and1(<16 x i16> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
-;
-; AVX10-256-LABEL: allzeros_v16i16_and1:
-; AVX10-256: # %bb.0:
-; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489]
-; AVX10-256-NEXT: vptest %ymm1, %ymm0
-; AVX10-256-NEXT: sete %al
-; AVX10-256-NEXT: vzeroupper
-; AVX10-256-NEXT: retq
%tmp = and <16 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
%tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -2119,14 +2071,6 @@ define i1 @allzeros_v8i32_and1(<8 x i32> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
-;
-; AVX10-256-LABEL: allzeros_v8i32_and1:
-; AVX10-256: # %bb.0:
-; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297]
-; AVX10-256-NEXT: vptest %ymm1, %ymm0
-; AVX10-256-NEXT: sete %al
-; AVX10-256-NEXT: vzeroupper
-; AVX10-256-NEXT: retq
%tmp = and <8 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
%tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -2179,13 +2123,13 @@ define i1 @allones_v16i32_and1(<16 x i32> %arg) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: allones_v16i32_and1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: setb %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; EVEX512-LABEL: allones_v16i32_and1:
+; EVEX512: # %bb.0:
+; EVEX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
+; EVEX512-NEXT: kortestw %k0, %k0
+; EVEX512-NEXT: setb %al
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
;
; AVX10-256-LABEL: allones_v16i32_and1:
; AVX10-256: # %bb.0:
@@ -2242,13 +2186,13 @@ define i1 @allzeros_v16i32_and1(<16 x i32> %arg) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: allzeros_v16i32_and1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; EVEX512-LABEL: allzeros_v16i32_and1:
+; EVEX512: # %bb.0:
+; EVEX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
+; EVEX512-NEXT: kortestw %k0, %k0
+; EVEX512-NEXT: sete %al
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
;
; AVX10-256-LABEL: allzeros_v16i32_and1:
; AVX10-256: # %bb.0:
@@ -2466,14 +2410,6 @@ define i1 @allzeros_v4i64_and1(<4 x i64> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
-;
-; AVX10-256-LABEL: allzeros_v4i64_and1:
-; AVX10-256: # %bb.0:
-; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
-; AVX10-256-NEXT: vptest %ymm1, %ymm0
-; AVX10-256-NEXT: sete %al
-; AVX10-256-NEXT: vzeroupper
-; AVX10-256-NEXT: retq
%tmp = and <4 x i64> %arg, <i64 1, i64 1, i64 1, i64 1>
%tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
%tmp2 = bitcast <4 x i1> %tmp1 to i4
@@ -2598,14 +2534,14 @@ define i1 @allzeros_v8i64_and1(<8 x i64> %arg) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: allzeros_v8i64_and1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1]
-; AVX512-NEXT: vptestmd %zmm1, %zmm0, %k0
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; EVEX512-LABEL: allzeros_v8i64_and1:
+; EVEX512: # %bb.0:
+; EVEX512-NEXT: vpbroadcastq {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1]
+; EVEX512-NEXT: vptestmd %zmm1, %zmm0, %k0
+; EVEX512-NEXT: kortestw %k0, %k0
+; EVEX512-NEXT: sete %al
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
;
; AVX10-256-LABEL: allzeros_v8i64_and1:
; AVX10-256: # %bb.0:
@@ -2814,14 +2750,6 @@ define i1 @allzeros_v32i8_and4(<32 x i8> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
-;
-; AVX10-256-LABEL: allzeros_v32i8_and4:
-; AVX10-256: # %bb.0:
-; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [289360691352306692,289360691352306692,289360691352306692,289360691352306692]
-; AVX10-256-NEXT: vptest %ymm1, %ymm0
-; AVX10-256-NEXT: sete %al
-; AVX10-256-NEXT: vzeroupper
-; AVX10-256-NEXT: retq
%tmp = and <32 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
%tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
%tmp2 = bitcast <32 x i1> %tmp1 to i32
@@ -2938,13 +2866,13 @@ define i1 @allzeros_v64i8_and4(<64 x i8> %arg) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: allzeros_v64i8_and4:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; EVEX512-LABEL: allzeros_v64i8_and4:
+; EVEX512: # %bb.0:
+; EVEX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
+; EVEX512-NEXT: kortestw %k0, %k0
+; EVEX512-NEXT: sete %al
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
;
; AVX10-256-LABEL: allzeros_v64i8_and4:
; AVX10-256: # %bb.0:
@@ -3247,13 +3175,13 @@ define i1 @allzeros_v32i16_and4(<32 x i16> %arg) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: allzeros_v32i16_and4:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; EVEX512-LABEL: allzeros_v32i16_and4:
+; EVEX512: # %bb.0:
+; EVEX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
+; EVEX512-NEXT: kortestw %k0, %k0
+; EVEX512-NEXT: sete %al
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
;
; AVX10-256-LABEL: allzeros_v32i16_and4:
; AVX10-256: # %bb.0:
@@ -3309,14 +3237,6 @@ define i1 @allzeros_v16i16_and4(<16 x i16> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
-;
-; AVX10-256-LABEL: allzeros_v16i16_and4:
-; AVX10-256: # %bb.0:
-; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1125917086973956,1125917086973956,1125917086973956,1125917086973956]
-; AVX10-256-NEXT: vptest %ymm1, %ymm0
-; AVX10-256-NEXT: sete %al
-; AVX10-256-NEXT: vzeroupper
-; AVX10-256-NEXT: retq
%tmp = and <16 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
%tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
%tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -3523,14 +3443,6 @@ define i1 @allzeros_v8i32_and4(<8 x i32> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
-;
-; AVX10-256-LABEL: allzeros_v8i32_and4:
-; AVX10-256: # %bb.0:
-; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17179869188,17179869188,17179869188,17179869188]
-; AVX10-256-NEXT: vptest %ymm1, %ymm0
-; AVX10-256-NEXT: sete %al
-; AVX10-256-NEXT: vzeroupper
-; AVX10-256-NEXT: retq
%tmp = and <8 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
%tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
%tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -3583,13 +3495,13 @@ define i1 @allones_v16i32_and4(<16 x i32> %arg) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: allones_v16i32_and4:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: setb %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; EVEX512-LABEL: allones_v16i32_and4:
+; EVEX512: # %bb.0:
+; EVEX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
+; EVEX512-NEXT: kortestw %k0, %k0
+; EVEX512-NEXT: setb %al
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
;
; AVX10-256-LABEL: allones_v16i32_and4:
; AVX10-256: # %bb.0:
@@ -3646,13 +3558,13 @@ define i1 @allzeros_v16i32_and4(<16 x i32> %arg) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: allzeros_v16i32_and4:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; EVEX512-LABEL: allzeros_v16i32_and4:
+; EVEX512: # %bb.0:
+; EVEX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
+; EVEX512-NEXT: kortestw %k0, %k0
+; EVEX512-NEXT: sete %al
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
;
; AVX10-256-LABEL: allzeros_v16i32_and4:
; AVX10-256: # %bb.0:
@@ -3870,14 +3782,6 @@ define i1 @allzeros_v4i64_and4(<4 x i64> %arg) {
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
-;
-; AVX10-256-LABEL: allzeros_v4i64_and4:
-; AVX10-256: # %bb.0:
-; AVX10-256-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4]
-; AVX10-256-NEXT: vptest %ymm1, %ymm0
-; AVX10-256-NEXT: sete %al
-; AVX10-256-NEXT: vzeroupper
-; AVX10-256-NEXT: retq
%tmp = and <4 x i64> %arg, <i64 4, i64 4, i64 4, i64 4>
%tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
%tmp2 = bitcast <4 x i1> %tmp1 to i4
@@ -4002,14 +3906,14 @@ define i1 @allzeros_v8i64_and4(<8 x i64> %arg) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: allzeros_v8i64_and4:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4]
-; AVX512-NEXT: vptestmd %zmm1, %zmm0, %k0
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; EVEX512-LABEL: allzeros_v8i64_and4:
+; EVEX512: # %bb.0:
+; EVEX512-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4]
+; EVEX512-NEXT: vptestmd %zmm1, %zmm0, %k0
+; EVEX512-NEXT: kortestw %k0, %k0
+; EVEX512-NEXT: sete %al
+; EVEX512-NEXT: vzeroupper
+; EVEX512-NEXT: retq
;
; AVX10-256-LABEL: allzeros_v8i64_and4:
; AVX10-256: # %bb.0:
@@ -4205,12 +4109,6 @@ define i32 @movmskb256(<32 x i8> %x) {
; AVX512-NEXT: vpmovmskb %ymm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
-;
-; AVX10-256-LABEL: movmskb256:
-; AVX10-256: # %bb.0:
-; AVX10-256-NEXT: vpmovmskb %ymm0, %eax
-; AVX10-256-NEXT: vzeroupper
-; AVX10-256-NEXT: retq
%a = icmp slt <32 x i8> %x, zeroinitializer
%b = bitcast <32 x i1> %a to i32
ret i32 %b
More information about the llvm-commits
mailing list