[llvm] [X86] Fix optmasks handling for AVX10.1-256 (PR #73074)

Evgenii Kudriashov via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 5 15:08:29 PST 2023


https://github.com/e-kud updated https://github.com/llvm/llvm-project/pull/73074

>From 44757e28781ee7ff48fe1d9045b90665183ad2a0 Mon Sep 17 00:00:00 2001
From: Evgenii Kudriashov <evgenii.kudriashov at intel.com>
Date: Tue, 7 Nov 2023 18:27:23 -0800
Subject: [PATCH] [X86] Improve optmasks handling for AVX10.1-256

Quadword opmask instructions are only supported on processors
supporting vector lengths of 512 bits.
---
 llvm/lib/Target/X86/X86DomainReassignment.cpp |  35 +-
 llvm/lib/Target/X86/X86ISelLowering.cpp       |   5 +-
 llvm/lib/Target/X86/X86InstrInfo.cpp          |  16 +-
 llvm/lib/Target/X86/X86Subtarget.h            |   3 +-
 llvm/test/CodeGen/X86/avx512-mask-op.ll       | 772 ++++++++++++++-
 llvm/test/CodeGen/X86/avx512-vec-cmp.ll       | 899 +++++++++++++++---
 llvm/test/CodeGen/X86/avx512bw-mask-op.ll     | 105 +-
 llvm/test/CodeGen/X86/kshift.ll               | 301 ++++++
 llvm/test/CodeGen/X86/movmsk-cmp.ll           | 798 ++++++++++++++--
 llvm/tools/llvm-exegesis/lib/X86/Target.cpp   |   3 +-
 10 files changed, 2614 insertions(+), 323 deletions(-)

diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp
index bdd86e48fa5438..70f3be0e12ece8 100644
--- a/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -662,37 +662,30 @@ void X86DomainReassignment::initConverters() {
 
   if (STI->hasBWI()) {
     createReplacer(X86::MOV32rm, HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm);
-    createReplacer(X86::MOV64rm, HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm);
-
     createReplacer(X86::MOV32mr, HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
-    createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
-
     createReplacer(X86::MOV32rr, HasEGPR ? X86::KMOVDkk_EVEX : X86::KMOVDkk);
-    createReplacer(X86::MOV64rr, HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk);
-
     createReplacer(X86::SHR32ri, X86::KSHIFTRDri);
-    createReplacer(X86::SHR64ri, X86::KSHIFTRQri);
-
     createReplacer(X86::SHL32ri, X86::KSHIFTLDri);
-    createReplacer(X86::SHL64ri, X86::KSHIFTLQri);
-
     createReplacer(X86::ADD32rr, X86::KADDDrr);
-    createReplacer(X86::ADD64rr, X86::KADDQrr);
-
     createReplacer(X86::NOT32r, X86::KNOTDrr);
-    createReplacer(X86::NOT64r, X86::KNOTQrr);
-
     createReplacer(X86::OR32rr, X86::KORDrr);
-    createReplacer(X86::OR64rr, X86::KORQrr);
-
     createReplacer(X86::AND32rr, X86::KANDDrr);
-    createReplacer(X86::AND64rr, X86::KANDQrr);
-
     createReplacer(X86::ANDN32rr, X86::KANDNDrr);
-    createReplacer(X86::ANDN64rr, X86::KANDNQrr);
-
     createReplacer(X86::XOR32rr, X86::KXORDrr);
-    createReplacer(X86::XOR64rr, X86::KXORQrr);
+
+    if (STI->hasEVEX512()) {
+      createReplacer(X86::MOV64rm, HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm);
+      createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
+      createReplacer(X86::MOV64rr, HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk);
+      createReplacer(X86::SHR64ri, X86::KSHIFTRQri);
+      createReplacer(X86::SHL64ri, X86::KSHIFTLQri);
+      createReplacer(X86::ADD64rr, X86::KADDQrr);
+      createReplacer(X86::NOT64r, X86::KNOTQrr);
+      createReplacer(X86::OR64rr, X86::KORQrr);
+      createReplacer(X86::AND64rr, X86::KANDQrr);
+      createReplacer(X86::ANDN64rr, X86::KANDNQrr);
+      createReplacer(X86::XOR64rr, X86::KXORQrr);
+    }
 
     // TODO: KTEST is not a replacement for TEST due to flag differences. Need
     // to prove only Z flag is used.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4fca5afb46dd2b..0c2eaa7cf5bf51 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2058,9 +2058,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   // AVX512BW..
   if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
     addRegisterClass(MVT::v32i1,  &X86::VK32RegClass);
-    addRegisterClass(MVT::v64i1,  &X86::VK64RegClass);
+    if (Subtarget.hasEVEX512())
+      addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
 
     for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
+      if (VT == MVT::v64i1 && !Subtarget.hasEVEX512())
+        continue;
       setOperationAction(ISD::VSELECT,            VT, Expand);
       setOperationAction(ISD::TRUNCATE,           VT, Custom);
       setOperationAction(ISD::SETCC,              VT, Custom);
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index ea3bf1f101c1e0..254ffe7818c9bb 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -3996,7 +3996,8 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
   // anyone.
   if (X86::VK16RegClass.contains(SrcReg)) {
     if (X86::GR64RegClass.contains(DestReg)) {
-      assert(Subtarget.hasBWI());
+      assert(Subtarget.hasBWI() && Subtarget.hasEVEX512() &&
+             "KMOVQ requires BWI with EVEX512");
       return HasEGPR ? X86::KMOVQrk_EVEX : X86::KMOVQrk;
     }
     if (X86::GR32RegClass.contains(DestReg))
@@ -4011,7 +4012,8 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
   // anyone.
   if (X86::VK16RegClass.contains(DestReg)) {
     if (X86::GR64RegClass.contains(SrcReg)) {
-      assert(Subtarget.hasBWI());
+      assert(Subtarget.hasBWI() && Subtarget.hasEVEX512() &&
+             "KMOVQ requires BWI with EVEX512");
       return HasEGPR ? X86::KMOVQkr_EVEX : X86::KMOVQkr;
     }
     if (X86::GR32RegClass.contains(SrcReg))
@@ -4125,8 +4127,9 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   // All KMASK RegClasses hold the same k registers, can be tested against
   // anyone.
   else if (X86::VK16RegClass.contains(DestReg, SrcReg))
-    Opc = Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk)
-                             : (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVWkk);
+    Opc = Subtarget.hasBWI() && Subtarget.hasEVEX512()
+              ? (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk)
+              : (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVWkk);
   if (!Opc)
     Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
 
@@ -4247,7 +4250,8 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
     if (X86::RFP64RegClass.hasSubClassEq(RC))
       return Load ? X86::LD_Fp64m : X86::ST_Fp64m;
     if (X86::VK64RegClass.hasSubClassEq(RC)) {
-      assert(STI.hasBWI() && "KMOVQ requires BWI");
+      assert(STI.hasBWI() && STI.hasEVEX512() &&
+             "KMOVQ requires BWI with EVEX512");
       return Load ? (HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm)
                   : (HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
     }
@@ -10523,7 +10527,7 @@ void X86InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
       return;
 
     // KXOR is safe to use because it doesn't affect flags.
-    unsigned Op = ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr;
+    unsigned Op = ST.hasBWI() && ST.hasEVEX512() ? X86::KXORQrr : X86::KXORWrr;
     BuildMI(MBB, Iter, DL, get(Op), Reg)
         .addReg(Reg, RegState::Undef)
         .addReg(Reg, RegState::Undef);
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index a458b5f9ec8fbb..47d24f4be58a3e 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -244,7 +244,8 @@ class X86Subtarget final : public X86GenSubtargetInfo {
   // TODO: Currently we're always allowing widening on CPUs without VLX,
   // because for many cases we don't have a better option.
   bool canExtendTo512DQ() const {
-    return hasAVX512() && (!hasVLX() || getPreferVectorWidth() >= 512);
+    return hasAVX512() && (!hasVLX() || getPreferVectorWidth() >= 512) &&
+           hasEVEX512();
   }
   bool canExtendTo512BW() const  {
     return hasBWI() && canExtendTo512DQ();
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 9e689341f7b88e..d2246ee2a33885 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -1,8 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,KNL
-; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefixes=CHECK,SKX
-; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512bw  | FileCheck %s --check-prefixes=CHECK,AVX512BW
-; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512dq  | FileCheck %s --check-prefixes=CHECK,AVX512DQ
+; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,EVEX512,KNL
+; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefixes=CHECK,EVEX512,SKX
+; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512bw  | FileCheck %s --check-prefixes=CHECK,EVEX512,AVX512BW
+; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512dq  | FileCheck %s --check-prefixes=CHECK,EVEX512,AVX512DQ
+; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=CHECK,AVX10-256
 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=X86
 
 
@@ -131,6 +132,13 @@ define void @mask8_mem(ptr %ptr) {
 ; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: mask8_mem:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    kmovb (%rdi), %k0
+; AVX10-256-NEXT:    knotb %k0, %k0
+; AVX10-256-NEXT:    kmovb %k0, (%rdi)
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: mask8_mem:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -205,6 +213,15 @@ define i16 @mand16_mem(ptr %x, ptr %y) {
 ; AVX512DQ-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: mand16_mem:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    kmovw (%rdi), %k0
+; AVX10-256-NEXT:    kmovw (%rsi), %k1
+; AVX10-256-NEXT:    korw %k1, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    ## kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: mand16_mem:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -257,6 +274,14 @@ define i8 @shuf_test1(i16 %v) nounwind {
 ; AVX512DQ-NEXT:    ## kill: def $al killed $al killed $eax
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: shuf_test1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    kmovd %edi, %k0
+; AVX10-256-NEXT:    kshiftrw $8, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    ## kill: def $al killed $al killed $eax
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: shuf_test1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
@@ -304,6 +329,15 @@ define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: zext_test1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpnleud %ymm2, %ymm0, %k0
+; AVX10-256-NEXT:    kshiftrb $5, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    andl $1, %eax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: zext_test1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
@@ -359,6 +393,16 @@ define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: zext_test2:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpnleud %ymm2, %ymm0, %k0
+; AVX10-256-NEXT:    kshiftrb $5, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    andl $1, %eax
+; AVX10-256-NEXT:    ## kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: zext_test2:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
@@ -415,6 +459,16 @@ define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: zext_test3:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpnleud %ymm2, %ymm0, %k0
+; AVX10-256-NEXT:    kshiftrb $5, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    andb $1, %al
+; AVX10-256-NEXT:    ## kill: def $al killed $al killed $eax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: zext_test3:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
@@ -506,6 +560,14 @@ define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: test4:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpgtq %ymm3, %ymm2, %k1
+; AVX10-256-NEXT:    vpcmpleq %ymm1, %ymm0, %k0 {%k1}
+; AVX10-256-NEXT:    vpmovm2d %k0, %xmm0
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: test4:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vpcmpgtq %ymm3, %ymm2, %k1
@@ -567,6 +629,13 @@ define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: test5:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpgtq %xmm0, %xmm1, %k1
+; AVX10-256-NEXT:    vpcmpleq %xmm3, %xmm2, %k0 {%k1}
+; AVX10-256-NEXT:    vpmovm2q %k0, %xmm0
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: test5:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vpcmpgtq %xmm0, %xmm1, %k1
@@ -645,6 +714,14 @@ define void @test7(<8 x i1> %mask)  {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: test7:
+; AVX10-256:       ## %bb.0: ## %allocas
+; AVX10-256-NEXT:    vpsllw $15, %xmm0, %xmm0
+; AVX10-256-NEXT:    vpmovw2m %xmm0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    orb $85, %al
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: test7:
 ; X86:       ## %bb.0: ## %allocas
 ; X86-NEXT:    vpsllw $15, %xmm0, %xmm0
@@ -732,6 +809,24 @@ define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: test8:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    cmpl %esi, %edi
+; AVX10-256-NEXT:    jg LBB17_1
+; AVX10-256-NEXT:  ## %bb.2:
+; AVX10-256-NEXT:    kxorw %k0, %k0, %k0
+; AVX10-256-NEXT:    vpmovm2b %k0, %xmm0
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
+; AVX10-256-NEXT:  LBB17_1:
+; AVX10-256-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX10-256-NEXT:    vpcmpgtd %ymm2, %ymm0, %k0
+; AVX10-256-NEXT:    vpcmpgtd %ymm2, %ymm1, %k1
+; AVX10-256-NEXT:    kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT:    vpmovm2b %k0, %xmm0
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: test8:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -821,6 +916,20 @@ define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: test9:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    cmpl %esi, %edi
+; AVX10-256-NEXT:    jg LBB18_1
+; AVX10-256-NEXT:  ## %bb.2:
+; AVX10-256-NEXT:    vpsllw $7, %xmm1, %xmm0
+; AVX10-256-NEXT:    jmp LBB18_3
+; AVX10-256-NEXT:  LBB18_1:
+; AVX10-256-NEXT:    vpsllw $7, %xmm0, %xmm0
+; AVX10-256-NEXT:  LBB18_3:
+; AVX10-256-NEXT:    vpmovb2m %xmm0, %k0
+; AVX10-256-NEXT:    vpmovm2b %k0, %xmm0
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: test9:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -907,6 +1016,20 @@ define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: test10:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    cmpl %esi, %edi
+; AVX10-256-NEXT:    jg LBB19_1
+; AVX10-256-NEXT:  ## %bb.2:
+; AVX10-256-NEXT:    vpsllw $15, %xmm1, %xmm0
+; AVX10-256-NEXT:    jmp LBB19_3
+; AVX10-256-NEXT:  LBB19_1:
+; AVX10-256-NEXT:    vpsllw $15, %xmm0, %xmm0
+; AVX10-256-NEXT:  LBB19_3:
+; AVX10-256-NEXT:    vpmovw2m %xmm0, %k0
+; AVX10-256-NEXT:    vpmovm2w %k0, %xmm0
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: test10:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -989,6 +1112,20 @@ define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: test11:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    cmpl %esi, %edi
+; AVX10-256-NEXT:    jg LBB20_1
+; AVX10-256-NEXT:  ## %bb.2:
+; AVX10-256-NEXT:    vpslld $31, %xmm1, %xmm0
+; AVX10-256-NEXT:    jmp LBB20_3
+; AVX10-256-NEXT:  LBB20_1:
+; AVX10-256-NEXT:    vpslld $31, %xmm0, %xmm0
+; AVX10-256-NEXT:  LBB20_3:
+; AVX10-256-NEXT:    vpmovd2m %xmm0, %k0
+; AVX10-256-NEXT:    vpmovm2d %k0, %xmm0
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: test11:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -1120,6 +1257,16 @@ define <16 x i1> @test15(i32 %x, i32 %y)  {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: test15:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    cmpl %esi, %edi
+; AVX10-256-NEXT:    movl $21845, %eax ## imm = 0x5555
+; AVX10-256-NEXT:    movl $1, %ecx
+; AVX10-256-NEXT:    cmovgl %eax, %ecx
+; AVX10-256-NEXT:    kmovd %ecx, %k0
+; AVX10-256-NEXT:    vpmovm2b %k0, %xmm0
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: test15:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -1231,6 +1378,23 @@ define <64 x i8> @test16(i64 %x) {
 ; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: test16:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    kmovd %edi, %k0
+; AVX10-256-NEXT:    shrq $32, %rdi
+; AVX10-256-NEXT:    kmovd %edi, %k1
+; AVX10-256-NEXT:    movl $-33, %eax
+; AVX10-256-NEXT:    kmovd %eax, %k2
+; AVX10-256-NEXT:    kandd %k2, %k0, %k0
+; AVX10-256-NEXT:    movb $1, %al
+; AVX10-256-NEXT:    kmovd %eax, %k2
+; AVX10-256-NEXT:    kshiftld $31, %k2, %k2
+; AVX10-256-NEXT:    kshiftrd $26, %k2, %k2
+; AVX10-256-NEXT:    kord %k2, %k0, %k0
+; AVX10-256-NEXT:    vpmovm2b %k0, %ymm0
+; AVX10-256-NEXT:    vpmovm2b %k1, %ymm1
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: test16:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k0
@@ -1350,6 +1514,24 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
 ; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: test17:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    kmovd %edi, %k0
+; AVX10-256-NEXT:    shrq $32, %rdi
+; AVX10-256-NEXT:    kmovd %edi, %k1
+; AVX10-256-NEXT:    cmpl %edx, %esi
+; AVX10-256-NEXT:    setg %al
+; AVX10-256-NEXT:    movl $-33, %ecx
+; AVX10-256-NEXT:    kmovd %ecx, %k2
+; AVX10-256-NEXT:    kandd %k2, %k0, %k0
+; AVX10-256-NEXT:    kmovd %eax, %k2
+; AVX10-256-NEXT:    kshiftld $31, %k2, %k2
+; AVX10-256-NEXT:    kshiftrd $26, %k2, %k2
+; AVX10-256-NEXT:    kord %k2, %k0, %k0
+; AVX10-256-NEXT:    vpmovm2b %k0, %ymm0
+; AVX10-256-NEXT:    vpmovm2b %k1, %ymm1
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: test17:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k0
@@ -1455,6 +1637,24 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: test18:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    kmovd %edi, %k0
+; AVX10-256-NEXT:    kmovd %esi, %k1
+; AVX10-256-NEXT:    kshiftrw $8, %k1, %k2
+; AVX10-256-NEXT:    kshiftrw $9, %k1, %k1
+; AVX10-256-NEXT:    movb $-65, %al
+; AVX10-256-NEXT:    kmovd %eax, %k3
+; AVX10-256-NEXT:    kandb %k3, %k0, %k0
+; AVX10-256-NEXT:    kshiftlb $6, %k1, %k1
+; AVX10-256-NEXT:    korb %k1, %k0, %k0
+; AVX10-256-NEXT:    kshiftlb $1, %k0, %k0
+; AVX10-256-NEXT:    kshiftrb $1, %k0, %k0
+; AVX10-256-NEXT:    kshiftlb $7, %k2, %k1
+; AVX10-256-NEXT:    korb %k1, %k0, %k0
+; AVX10-256-NEXT:    vpmovm2w %k0, %xmm0
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: test18:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0
@@ -1521,6 +1721,15 @@ define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
 ; AVX512DQ-NEXT:    vpandq %zmm0, %zmm1, %zmm0
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: test21:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpsllw $7, %ymm2, %ymm2
+; AVX10-256-NEXT:    vpmovb2m %ymm2, %k1
+; AVX10-256-NEXT:    vmovdqu16 %ymm0, %ymm0 {%k1} {z}
+; AVX10-256-NEXT:    kshiftrd $16, %k1, %k1
+; AVX10-256-NEXT:    vmovdqu16 %ymm1, %ymm1 {%k1} {z}
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: test21:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vpsllw $7, %ymm1, %ymm1
@@ -1571,6 +1780,13 @@ define void @test22(<4 x i1> %a, ptr %addr) {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: test22:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpslld $31, %xmm0, %xmm0
+; AVX10-256-NEXT:    vpmovd2m %xmm0, %k0
+; AVX10-256-NEXT:    kmovb %k0, (%rdi)
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: test22:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vpslld $31, %xmm0, %xmm0
@@ -1622,6 +1838,13 @@ define void @test23(<2 x i1> %a, ptr %addr) {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: test23:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpsllq $63, %xmm0, %xmm0
+; AVX10-256-NEXT:    vpmovq2m %xmm0, %k0
+; AVX10-256-NEXT:    kmovb %k0, (%rdi)
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: test23:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vpsllq $63, %xmm0, %xmm0
@@ -1672,6 +1895,15 @@ define void @store_v1i1(<1 x i1> %c , ptr %ptr) {
 ; AVX512DQ-NEXT:    kmovb %k0, (%rsi)
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: store_v1i1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    kmovd %edi, %k0
+; AVX10-256-NEXT:    knotw %k0, %k0
+; AVX10-256-NEXT:    kshiftlb $7, %k0, %k0
+; AVX10-256-NEXT:    kshiftrb $7, %k0, %k0
+; AVX10-256-NEXT:    kmovb %k0, (%rsi)
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: store_v1i1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
@@ -1730,6 +1962,16 @@ define void @store_v2i1(<2 x i1> %c , ptr %ptr) {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: store_v2i1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpsllq $63, %xmm0, %xmm0
+; AVX10-256-NEXT:    vpmovq2m %xmm0, %k0
+; AVX10-256-NEXT:    knotw %k0, %k0
+; AVX10-256-NEXT:    kshiftlb $6, %k0, %k0
+; AVX10-256-NEXT:    kshiftrb $6, %k0, %k0
+; AVX10-256-NEXT:    kmovb %k0, (%rdi)
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: store_v2i1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vpsllq $63, %xmm0, %xmm0
@@ -1789,6 +2031,16 @@ define void @store_v4i1(<4 x i1> %c , ptr %ptr) {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: store_v4i1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpslld $31, %xmm0, %xmm0
+; AVX10-256-NEXT:    vpmovd2m %xmm0, %k0
+; AVX10-256-NEXT:    knotw %k0, %k0
+; AVX10-256-NEXT:    kshiftlb $4, %k0, %k0
+; AVX10-256-NEXT:    kshiftrb $4, %k0, %k0
+; AVX10-256-NEXT:    kmovb %k0, (%rdi)
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: store_v4i1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vpslld $31, %xmm0, %xmm0
@@ -1843,6 +2095,14 @@ define void @store_v8i1(<8 x i1> %c , ptr %ptr) {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: store_v8i1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpsllw $15, %xmm0, %xmm0
+; AVX10-256-NEXT:    vpmovw2m %xmm0, %k0
+; AVX10-256-NEXT:    knotb %k0, %k0
+; AVX10-256-NEXT:    kmovb %k0, (%rdi)
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: store_v8i1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vpsllw $15, %xmm0, %xmm0
@@ -1893,6 +2153,14 @@ define void @store_v16i1(<16 x i1> %c , ptr %ptr) {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: store_v16i1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpsllw $7, %xmm0, %xmm0
+; AVX10-256-NEXT:    vpmovb2m %xmm0, %k0
+; AVX10-256-NEXT:    knotw %k0, %k0
+; AVX10-256-NEXT:    kmovw %k0, (%rdi)
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: store_v16i1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vpsllw $7, %xmm0, %xmm0
@@ -2008,6 +2276,12 @@ define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) {
 ; AVX512DQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: test_build_vec_v32i1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX10-256-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: test_build_vec_v32i1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0
@@ -2041,6 +2315,12 @@ define <32 x i16> @test_build_vec_v32i1_optsize(<32 x i16> %x) optsize {
 ; AVX512DQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: test_build_vec_v32i1_optsize:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX10-256-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: test_build_vec_v32i1_optsize:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl $1497715861, %eax ## imm = 0x59455495
@@ -2076,6 +2356,12 @@ define <32 x i16> @test_build_vec_v32i1_pgso(<32 x i16> %x) !prof !14 {
 ; AVX512DQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: test_build_vec_v32i1_pgso:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX10-256-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: test_build_vec_v32i1_pgso:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl $1497715861, %eax ## imm = 0x59455495
@@ -2107,6 +2393,12 @@ define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) {
 ; AVX512DQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: test_build_vec_v64i1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX10-256-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: test_build_vec_v64i1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0
@@ -2182,6 +2474,31 @@ define void @ktest_1(<8 x double> %in, ptr %base) {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: ktest_1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpgtpd (%rdi), %ymm0, %k1
+; AVX10-256-NEXT:    vcmpgtpd 32(%rdi), %ymm1, %k2
+; AVX10-256-NEXT:    kshiftlb $4, %k2, %k0
+; AVX10-256-NEXT:    korb %k0, %k1, %k0
+; AVX10-256-NEXT:    vmovupd 40(%rdi), %ymm2 {%k2} {z}
+; AVX10-256-NEXT:    vmovupd 8(%rdi), %ymm3 {%k1} {z}
+; AVX10-256-NEXT:    vcmpltpd %ymm3, %ymm0, %k1
+; AVX10-256-NEXT:    vcmpltpd %ymm2, %ymm1, %k2
+; AVX10-256-NEXT:    kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT:    korb %k2, %k1, %k1
+; AVX10-256-NEXT:    ktestb %k1, %k0
+; AVX10-256-NEXT:    je LBB44_2
+; AVX10-256-NEXT:  ## %bb.1: ## %L1
+; AVX10-256-NEXT:    vmovapd %ymm0, (%rdi)
+; AVX10-256-NEXT:    vmovapd %ymm1, 32(%rdi)
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
+; AVX10-256-NEXT:  LBB44_2: ## %L2
+; AVX10-256-NEXT:    vmovapd %ymm0, 8(%rdi)
+; AVX10-256-NEXT:    vmovapd %ymm1, 40(%rdi)
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: ktest_1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -2316,6 +2633,43 @@ define void @ktest_2(<32 x float> %in, ptr %base) {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: ktest_2:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpgtps (%rdi), %ymm0, %k1
+; AVX10-256-NEXT:    vcmpgtps 32(%rdi), %ymm1, %k2
+; AVX10-256-NEXT:    kunpckbw %k1, %k2, %k0
+; AVX10-256-NEXT:    vcmpgtps 64(%rdi), %ymm2, %k3
+; AVX10-256-NEXT:    vcmpgtps 96(%rdi), %ymm3, %k4
+; AVX10-256-NEXT:    kunpckbw %k3, %k4, %k5
+; AVX10-256-NEXT:    kunpckwd %k0, %k5, %k0
+; AVX10-256-NEXT:    vmovups 100(%rdi), %ymm4 {%k4} {z}
+; AVX10-256-NEXT:    vmovups 68(%rdi), %ymm5 {%k3} {z}
+; AVX10-256-NEXT:    vmovups 36(%rdi), %ymm6 {%k2} {z}
+; AVX10-256-NEXT:    vmovups 4(%rdi), %ymm7 {%k1} {z}
+; AVX10-256-NEXT:    vcmpltps %ymm7, %ymm0, %k1
+; AVX10-256-NEXT:    vcmpltps %ymm6, %ymm1, %k2
+; AVX10-256-NEXT:    kunpckbw %k1, %k2, %k1
+; AVX10-256-NEXT:    vcmpltps %ymm5, %ymm2, %k2
+; AVX10-256-NEXT:    vcmpltps %ymm4, %ymm3, %k3
+; AVX10-256-NEXT:    kunpckbw %k2, %k3, %k2
+; AVX10-256-NEXT:    kunpckwd %k1, %k2, %k1
+; AVX10-256-NEXT:    kortestd %k1, %k0
+; AVX10-256-NEXT:    je LBB45_2
+; AVX10-256-NEXT:  ## %bb.1: ## %L1
+; AVX10-256-NEXT:    vmovaps %ymm0, (%rdi)
+; AVX10-256-NEXT:    vmovaps %ymm1, 32(%rdi)
+; AVX10-256-NEXT:    vmovaps %ymm2, 64(%rdi)
+; AVX10-256-NEXT:    vmovaps %ymm3, 96(%rdi)
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
+; AVX10-256-NEXT:  LBB45_2: ## %L2
+; AVX10-256-NEXT:    vmovaps %ymm0, 4(%rdi)
+; AVX10-256-NEXT:    vmovaps %ymm1, 36(%rdi)
+; AVX10-256-NEXT:    vmovaps %ymm2, 68(%rdi)
+; AVX10-256-NEXT:    vmovaps %ymm3, 100(%rdi)
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: ktest_2:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -2390,6 +2744,14 @@ define <8 x i64> @load_8i1(ptr %a) {
 ; AVX512DQ-NEXT:    vpmovm2q %k0, %zmm0
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: load_8i1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    kmovb (%rdi), %k0
+; AVX10-256-NEXT:    vpmovm2q %k0, %ymm0
+; AVX10-256-NEXT:    kshiftrb $4, %k0, %k0
+; AVX10-256-NEXT:    vpmovm2q %k0, %ymm1
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: load_8i1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -2426,6 +2788,14 @@ define <16 x i32> @load_16i1(ptr %a) {
 ; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: load_16i1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    kmovb (%rdi), %k0
+; AVX10-256-NEXT:    kmovb 1(%rdi), %k1
+; AVX10-256-NEXT:    vpmovm2d %k0, %ymm0
+; AVX10-256-NEXT:    vpmovm2d %k1, %ymm1
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: load_16i1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -2472,6 +2842,12 @@ define <2 x i16> @load_2i1(ptr %a) {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: load_2i1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    kmovb (%rdi), %k0
+; AVX10-256-NEXT:    vpmovm2w %k0, %xmm0
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: load_2i1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -2518,6 +2894,12 @@ define <4 x i16> @load_4i1(ptr %a) {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: load_4i1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    kmovb (%rdi), %k0
+; AVX10-256-NEXT:    vpmovm2w %k0, %xmm0
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: load_4i1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -2564,6 +2946,14 @@ define <32 x i16> @load_32i1(ptr %a) {
 ; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: load_32i1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    kmovw (%rdi), %k0
+; AVX10-256-NEXT:    kmovw 2(%rdi), %k1
+; AVX10-256-NEXT:    vpmovm2w %k0, %ymm0
+; AVX10-256-NEXT:    vpmovm2w %k1, %ymm1
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: load_32i1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -2626,6 +3016,14 @@ define <64 x i8> @load_64i1(ptr %a) {
 ; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: load_64i1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    kmovd (%rdi), %k0
+; AVX10-256-NEXT:    kmovd 4(%rdi), %k1
+; AVX10-256-NEXT:    vpmovm2b %k0, %ymm0
+; AVX10-256-NEXT:    vpmovm2b %k1, %ymm1
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: load_64i1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -2673,6 +3071,13 @@ define void @store_8i1(ptr %a, <8 x i1> %v) {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: store_8i1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpsllw $15, %xmm0, %xmm0
+; AVX10-256-NEXT:    vpmovw2m %xmm0, %k0
+; AVX10-256-NEXT:    kmovb %k0, (%rdi)
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: store_8i1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vpsllw $15, %xmm0, %xmm0
@@ -2720,6 +3125,13 @@ define void @store_8i1_1(ptr %a, <8 x i16> %v) {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: store_8i1_1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpsllw $15, %xmm0, %xmm0
+; AVX10-256-NEXT:    vpmovw2m %xmm0, %k0
+; AVX10-256-NEXT:    kmovb %k0, (%rdi)
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: store_8i1_1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -2766,6 +3178,13 @@ define void @store_16i1(ptr %a, <16 x i1> %v) {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: store_16i1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpsllw $7, %xmm0, %xmm0
+; AVX10-256-NEXT:    vpmovb2m %xmm0, %k0
+; AVX10-256-NEXT:    kmovw %k0, (%rdi)
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: store_16i1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vpsllw $7, %xmm0, %xmm0
@@ -2822,6 +3241,14 @@ define void @store_32i1(ptr %a, <32 x i1> %v) {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: store_32i1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpsllw $7, %ymm0, %ymm0
+; AVX10-256-NEXT:    vpmovb2m %ymm0, %k0
+; AVX10-256-NEXT:    kmovd %k0, (%rdi)
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: store_32i1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vpsllw $7, %ymm0, %ymm0
@@ -2879,6 +3306,17 @@ define void @store_32i1_1(ptr %a, <32 x i16> %v) {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: store_32i1_1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpsllw $15, %ymm0, %ymm0
+; AVX10-256-NEXT:    vpmovw2m %ymm0, %k0
+; AVX10-256-NEXT:    vpsllw $15, %ymm1, %ymm0
+; AVX10-256-NEXT:    vpmovw2m %ymm0, %k1
+; AVX10-256-NEXT:    kunpckwd %k0, %k1, %k0
+; AVX10-256-NEXT:    kmovd %k0, (%rdi)
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: store_32i1_1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -3787,6 +4225,17 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: store_64i1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpsllw $7, %ymm0, %ymm0
+; AVX10-256-NEXT:    vpmovb2m %ymm0, %k0
+; AVX10-256-NEXT:    vpsllw $7, %ymm1, %ymm0
+; AVX10-256-NEXT:    vpmovb2m %ymm0, %k1
+; AVX10-256-NEXT:    kmovd %k1, 4(%rdi)
+; AVX10-256-NEXT:    kmovd %k0, (%rdi)
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: store_64i1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vpsllw $7, %zmm0, %zmm0
@@ -3834,6 +4283,14 @@ define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: test_bitcast_v8i1_zext:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT:    kmovb %k0, %eax
+; AVX10-256-NEXT:    addl %eax, %eax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: test_bitcast_v8i1_zext:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vptestnmd %zmm0, %zmm0, %k0
@@ -3850,13 +4307,23 @@ define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
 }
 
 define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) {
-; CHECK-LABEL: test_bitcast_v16i1_zext:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %eax
-; CHECK-NEXT:    addl %eax, %eax
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    retq
+; EVEX512-LABEL: test_bitcast_v16i1_zext:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vptestnmd %zmm0, %zmm0, %k0
+; EVEX512-NEXT:    kmovw %k0, %eax
+; EVEX512-NEXT:    addl %eax, %eax
+; EVEX512-NEXT:    vzeroupper
+; EVEX512-NEXT:    retq
+;
+; AVX10-256-LABEL: test_bitcast_v16i1_zext:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT:    vptestnmd %ymm1, %ymm1, %k1
+; AVX10-256-NEXT:    kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT:    kmovw %k0, %eax
+; AVX10-256-NEXT:    addl %eax, %eax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
 ;
 ; X86-LABEL: test_bitcast_v16i1_zext:
 ; X86:       ## %bb.0:
@@ -4066,6 +4533,27 @@ define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
 ; AVX512DQ-NEXT:    addq $8, %rsp
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: ktest_signed:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpor %ymm3, %ymm1, %ymm1
+; AVX10-256-NEXT:    vpor %ymm2, %ymm0, %ymm0
+; AVX10-256-NEXT:    vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT:    vptestnmd %ymm1, %ymm1, %k1
+; AVX10-256-NEXT:    kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    testw %ax, %ax
+; AVX10-256-NEXT:    jle LBB66_1
+; AVX10-256-NEXT:  ## %bb.2: ## %bb.2
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
+; AVX10-256-NEXT:  LBB66_1: ## %bb.1
+; AVX10-256-NEXT:    pushq %rax
+; AVX10-256-NEXT:    .cfi_def_cfa_offset 16
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    callq _foo
+; AVX10-256-NEXT:    addq $8, %rsp
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: ktest_signed:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vpord %zmm1, %zmm0, %zmm0
@@ -4099,21 +4587,40 @@ declare void @foo()
 
 ; Make sure we can use the ZF/CF flags from kortest to check for all ones.
 define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) {
-; CHECK-LABEL: ktest_allones:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpord %zmm1, %zmm0, %zmm0
-; CHECK-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; CHECK-NEXT:    kortestw %k0, %k0
-; CHECK-NEXT:    je LBB67_2
-; CHECK-NEXT:  ## %bb.1: ## %bb.1
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    callq _foo
-; CHECK-NEXT:    addq $8, %rsp
-; CHECK-NEXT:  LBB67_2: ## %bb.2
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    retq
+; EVEX512-LABEL: ktest_allones:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vpord %zmm1, %zmm0, %zmm0
+; EVEX512-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; EVEX512-NEXT:    kortestw %k0, %k0
+; EVEX512-NEXT:    je LBB67_2
+; EVEX512-NEXT:  ## %bb.1: ## %bb.1
+; EVEX512-NEXT:    pushq %rax
+; EVEX512-NEXT:    .cfi_def_cfa_offset 16
+; EVEX512-NEXT:    vzeroupper
+; EVEX512-NEXT:    callq _foo
+; EVEX512-NEXT:    addq $8, %rsp
+; EVEX512-NEXT:  LBB67_2: ## %bb.2
+; EVEX512-NEXT:    vzeroupper
+; EVEX512-NEXT:    retq
+;
+; AVX10-256-LABEL: ktest_allones:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpor %ymm3, %ymm1, %ymm1
+; AVX10-256-NEXT:    vpor %ymm2, %ymm0, %ymm0
+; AVX10-256-NEXT:    vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT:    vptestnmd %ymm1, %ymm1, %k1
+; AVX10-256-NEXT:    kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT:    kortestw %k0, %k0
+; AVX10-256-NEXT:    jb LBB67_2
+; AVX10-256-NEXT:  ## %bb.1: ## %bb.1
+; AVX10-256-NEXT:    pushq %rax
+; AVX10-256-NEXT:    .cfi_def_cfa_offset 16
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    callq _foo
+; AVX10-256-NEXT:    addq $8, %rsp
+; AVX10-256-NEXT:  LBB67_2: ## %bb.2
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
 ;
 ; X86-LABEL: ktest_allones:
 ; X86:       ## %bb.0:
@@ -4182,6 +4689,14 @@ define <8 x i64> @mask_widening(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i
 ; AVX512DQ-NEXT:    vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: mask_widening:
+; AVX10-256:       ## %bb.0: ## %entry
+; AVX10-256-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1
+; AVX10-256-NEXT:    vpblendmd %ymm6, %ymm4, %ymm0 {%k1}
+; AVX10-256-NEXT:    kshiftrw $8, %k1, %k1
+; AVX10-256-NEXT:    vpblendmd %ymm7, %ymm5, %ymm1 {%k1}
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: mask_widening:
 ; X86:       ## %bb.0: ## %entry
 ; X86-NEXT:    pushl %ebp
@@ -4239,6 +4754,12 @@ define void @store_v128i1_constant(ptr %R) {
 ; AVX512DQ-NEXT:    vmovaps %xmm0, (%rdi)
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: store_v128i1_constant:
+; AVX10-256:       ## %bb.0: ## %entry
+; AVX10-256-NEXT:    vmovaps {{.*#+}} xmm0 = [4294963197,3758096251,4294959101,3221225403]
+; AVX10-256-NEXT:    vmovaps %xmm0, (%rdi)
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: store_v128i1_constant:
 ; X86:       ## %bb.0: ## %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -4302,13 +4823,24 @@ entry:
 
 ; Make sure we bring the -1 constant into the mask domain.
 define void @mask_not_cast(ptr, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>) {
-; CHECK-LABEL: mask_not_cast:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpnleud %zmm3, %zmm2, %k1
-; CHECK-NEXT:    vptestmd %zmm0, %zmm1, %k1 {%k1}
-; CHECK-NEXT:    vmovdqu32 %zmm0, (%rdi) {%k1}
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    retq
+; EVEX512-LABEL: mask_not_cast:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vpcmpnleud %zmm3, %zmm2, %k1
+; EVEX512-NEXT:    vptestmd %zmm0, %zmm1, %k1 {%k1}
+; EVEX512-NEXT:    vmovdqu32 %zmm0, (%rdi) {%k1}
+; EVEX512-NEXT:    vzeroupper
+; EVEX512-NEXT:    retq
+;
+; AVX10-256-LABEL: mask_not_cast:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpnleud %ymm6, %ymm4, %k1
+; AVX10-256-NEXT:    vpcmpnleud %ymm7, %ymm5, %k2
+; AVX10-256-NEXT:    vptestmd %ymm1, %ymm3, %k2 {%k2}
+; AVX10-256-NEXT:    vmovdqu32 %ymm1, 32(%rdi) {%k2}
+; AVX10-256-NEXT:    vptestmd %ymm0, %ymm2, %k1 {%k1}
+; AVX10-256-NEXT:    vmovdqu32 %ymm0, (%rdi) {%k1}
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
 ;
 ; X86-LABEL: mask_not_cast:
 ; X86:       ## %bb.0:
@@ -4436,6 +4968,27 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
 ; AVX512DQ-NEXT:    addq $8, %rsp
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: ktest_3:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT:    vptestnmd %ymm1, %ymm1, %k1
+; AVX10-256-NEXT:    korb %k1, %k0, %k0
+; AVX10-256-NEXT:    vptestnmd %ymm2, %ymm2, %k1
+; AVX10-256-NEXT:    vptestnmd %ymm3, %ymm3, %k2
+; AVX10-256-NEXT:    korb %k2, %k1, %k1
+; AVX10-256-NEXT:    ktestb %k1, %k0
+; AVX10-256-NEXT:    je LBB74_1
+; AVX10-256-NEXT:  ## %bb.2: ## %exit
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
+; AVX10-256-NEXT:  LBB74_1: ## %bar
+; AVX10-256-NEXT:    pushq %rax
+; AVX10-256-NEXT:    .cfi_def_cfa_offset 16
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    callq _foo
+; AVX10-256-NEXT:    addq $8, %rsp
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: ktest_3:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vptestnmd %ymm0, %ymm0, %k0
@@ -4564,6 +5117,39 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
 ; AVX512DQ-NEXT:    addq $8, %rsp
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: ktest_4:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT:    vptestnmq %ymm1, %ymm1, %k1
+; AVX10-256-NEXT:    kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT:    korb %k1, %k0, %k0
+; AVX10-256-NEXT:    vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT:    vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT:    kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT:    korb %k2, %k1, %k1
+; AVX10-256-NEXT:    korb %k1, %k0, %k0
+; AVX10-256-NEXT:    vptestnmq %ymm4, %ymm4, %k1
+; AVX10-256-NEXT:    vptestnmq %ymm5, %ymm5, %k2
+; AVX10-256-NEXT:    kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT:    korb %k2, %k1, %k1
+; AVX10-256-NEXT:    vptestnmq %ymm6, %ymm6, %k2
+; AVX10-256-NEXT:    vptestnmq %ymm7, %ymm7, %k3
+; AVX10-256-NEXT:    kshiftlb $4, %k3, %k3
+; AVX10-256-NEXT:    korb %k3, %k2, %k2
+; AVX10-256-NEXT:    korb %k2, %k1, %k1
+; AVX10-256-NEXT:    ktestb %k1, %k0
+; AVX10-256-NEXT:    je LBB75_1
+; AVX10-256-NEXT:  ## %bb.2: ## %exit
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
+; AVX10-256-NEXT:  LBB75_1: ## %bar
+; AVX10-256-NEXT:    pushq %rax
+; AVX10-256-NEXT:    .cfi_def_cfa_offset 16
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    callq _foo
+; AVX10-256-NEXT:    addq $8, %rsp
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: ktest_4:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vptestnmq %zmm0, %zmm0, %k0
@@ -4690,6 +5276,35 @@ define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
 ; AVX512DQ-NEXT:    addq $8, %rsp
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: ktest_5:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT:    vptestnmd %ymm1, %ymm1, %k1
+; AVX10-256-NEXT:    kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT:    vptestnmd %ymm2, %ymm2, %k1
+; AVX10-256-NEXT:    vptestnmd %ymm3, %ymm3, %k2
+; AVX10-256-NEXT:    kunpckbw %k1, %k2, %k1
+; AVX10-256-NEXT:    korw %k1, %k0, %k0
+; AVX10-256-NEXT:    vptestnmd %ymm4, %ymm4, %k1
+; AVX10-256-NEXT:    vptestnmd %ymm5, %ymm5, %k2
+; AVX10-256-NEXT:    kunpckbw %k1, %k2, %k1
+; AVX10-256-NEXT:    vptestnmd %ymm6, %ymm6, %k2
+; AVX10-256-NEXT:    vptestnmd %ymm7, %ymm7, %k3
+; AVX10-256-NEXT:    kunpckbw %k2, %k3, %k2
+; AVX10-256-NEXT:    korw %k2, %k1, %k1
+; AVX10-256-NEXT:    ktestw %k1, %k0
+; AVX10-256-NEXT:    je LBB76_1
+; AVX10-256-NEXT:  ## %bb.2: ## %exit
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
+; AVX10-256-NEXT:  LBB76_1: ## %bar
+; AVX10-256-NEXT:    pushq %rax
+; AVX10-256-NEXT:    .cfi_def_cfa_offset 16
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    callq _foo
+; AVX10-256-NEXT:    addq $8, %rsp
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: ktest_5:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vptestnmd %zmm0, %zmm0, %k0
@@ -4850,6 +5465,35 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
 ; AVX512DQ-NEXT:    addq $8, %rsp
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: ktest_6:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vptestnmw %ymm0, %ymm0, %k0
+; AVX10-256-NEXT:    vptestnmw %ymm1, %ymm1, %k1
+; AVX10-256-NEXT:    kunpckwd %k0, %k1, %k0
+; AVX10-256-NEXT:    vptestnmw %ymm2, %ymm2, %k1
+; AVX10-256-NEXT:    vptestnmw %ymm3, %ymm3, %k2
+; AVX10-256-NEXT:    kunpckwd %k1, %k2, %k1
+; AVX10-256-NEXT:    kord %k1, %k0, %k0
+; AVX10-256-NEXT:    vptestnmw %ymm4, %ymm4, %k1
+; AVX10-256-NEXT:    vptestnmw %ymm5, %ymm5, %k2
+; AVX10-256-NEXT:    kunpckwd %k1, %k2, %k1
+; AVX10-256-NEXT:    vptestnmw %ymm6, %ymm6, %k2
+; AVX10-256-NEXT:    vptestnmw %ymm7, %ymm7, %k3
+; AVX10-256-NEXT:    kunpckwd %k2, %k3, %k2
+; AVX10-256-NEXT:    kord %k2, %k1, %k1
+; AVX10-256-NEXT:    ktestd %k1, %k0
+; AVX10-256-NEXT:    je LBB77_1
+; AVX10-256-NEXT:  ## %bb.2: ## %exit
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
+; AVX10-256-NEXT:  LBB77_1: ## %bar
+; AVX10-256-NEXT:    pushq %rax
+; AVX10-256-NEXT:    .cfi_def_cfa_offset 16
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    callq _foo
+; AVX10-256-NEXT:    addq $8, %rsp
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: ktest_6:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vptestnmw %zmm0, %zmm0, %k0
@@ -5006,6 +5650,35 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
 ; AVX512DQ-NEXT:    addq $8, %rsp
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: ktest_7:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vptestnmb %ymm1, %ymm1, %k0
+; AVX10-256-NEXT:    vptestnmb %ymm0, %ymm0, %k1
+; AVX10-256-NEXT:    vptestnmb %ymm3, %ymm3, %k2
+; AVX10-256-NEXT:    kord %k2, %k0, %k0
+; AVX10-256-NEXT:    vptestnmb %ymm2, %ymm2, %k2
+; AVX10-256-NEXT:    kord %k2, %k1, %k1
+; AVX10-256-NEXT:    vptestnmb %ymm5, %ymm5, %k2
+; AVX10-256-NEXT:    vptestnmb %ymm4, %ymm4, %k3
+; AVX10-256-NEXT:    vptestnmb %ymm7, %ymm7, %k4
+; AVX10-256-NEXT:    kord %k4, %k2, %k2
+; AVX10-256-NEXT:    kandd %k2, %k0, %k0
+; AVX10-256-NEXT:    vptestnmb %ymm6, %ymm6, %k2
+; AVX10-256-NEXT:    kord %k2, %k3, %k2
+; AVX10-256-NEXT:    kandd %k2, %k1, %k1
+; AVX10-256-NEXT:    kortestd %k0, %k1
+; AVX10-256-NEXT:    je LBB78_1
+; AVX10-256-NEXT:  ## %bb.2: ## %exit
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
+; AVX10-256-NEXT:  LBB78_1: ## %bar
+; AVX10-256-NEXT:    pushq %rax
+; AVX10-256-NEXT:    .cfi_def_cfa_offset 16
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    callq _foo
+; AVX10-256-NEXT:    addq $8, %rsp
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: ktest_7:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vptestnmb %zmm0, %zmm0, %k0
@@ -5104,6 +5777,21 @@ define <64 x i1> @mask64_insert(i32 %a) {
 ; AVX512DQ-NEXT:    movl $-131075, 2(%rdi) ## imm = 0xFFFDFFFD
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: mask64_insert:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    kmovd %edi, %k0
+; AVX10-256-NEXT:    kshiftld $31, %k0, %k0
+; AVX10-256-NEXT:    kshiftrd $31, %k0, %k0
+; AVX10-256-NEXT:    movl $-131076, %eax ## imm = 0xFFFDFFFC
+; AVX10-256-NEXT:    kmovd %eax, %k1
+; AVX10-256-NEXT:    kshiftrd $1, %k1, %k1
+; AVX10-256-NEXT:    kshiftld $1, %k1, %k1
+; AVX10-256-NEXT:    kord %k0, %k1, %k0
+; AVX10-256-NEXT:    vpmovm2b %k0, %ymm0
+; AVX10-256-NEXT:    vbroadcastf128 {{.*#+}} ymm1 = [1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX10-256-NEXT:    ## ymm1 = mem[0,1,0,1]
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: mask64_insert:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0
@@ -5240,6 +5928,15 @@ define <1 x i1> @usub_sat_v1i1(<1 x i1> %x, <1 x i1> %y) nounwind {
 ; AVX512DQ-NEXT:    ## kill: def $al killed $al killed $eax
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: usub_sat_v1i1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    kmovd %esi, %k0
+; AVX10-256-NEXT:    kmovd %edi, %k1
+; AVX10-256-NEXT:    kandnw %k1, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    ## kill: def $al killed $al killed $eax
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: usub_sat_v1i1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
@@ -5309,6 +6006,15 @@ define <1 x i1> @ssub_sat_v1i1(<1 x i1> %x, <1 x i1> %y) nounwind {
 ; AVX512DQ-NEXT:    ## kill: def $al killed $al killed $eax
 ; AVX512DQ-NEXT:    retq
 ;
+; AVX10-256-LABEL: ssub_sat_v1i1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    kmovd %esi, %k0
+; AVX10-256-NEXT:    kmovd %edi, %k1
+; AVX10-256-NEXT:    kandnw %k1, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    ## kill: def $al killed $al killed $eax
+; AVX10-256-NEXT:    retq
+;
 ; X86-LABEL: ssub_sat_v1i1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
diff --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
index e4c62fca5bd57a..a97426f7a20904 100644
--- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
@@ -1,36 +1,61 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=KNL
-; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
-; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefixes=EVEX512,AVX512,KNL
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=EVEX512,AVX512,AVX512BW
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefixes=EVEX512,SKX
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=AVX10-256
 
 define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
-; CHECK-LABEL: test1:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vcmpleps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x02]
-; CHECK-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test1:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vcmpleps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x02]
+; EVEX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpleps %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xcb,0x02]
+; AVX10-256-NEXT:    vcmpleps %ymm2, %ymm0, %k2 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xd2,0x02]
+; AVX10-256-NEXT:    vblendmps %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x65,0xc0]
+; AVX10-256-NEXT:    vblendmps %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x65,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %mask = fcmp ole <16 x float> %x, %y
   %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
   ret <16 x float> %max
 }
 
 define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
-; CHECK-LABEL: test2:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vcmplepd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x02]
-; CHECK-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test2:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vcmplepd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x02]
+; EVEX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test2:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmplepd %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x28,0xc2,0xcb,0x02]
+; AVX10-256-NEXT:    vcmplepd %ymm2, %ymm0, %k2 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xd2,0x02]
+; AVX10-256-NEXT:    vblendmpd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x65,0xc0]
+; AVX10-256-NEXT:    vblendmpd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x65,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %mask = fcmp ole <8 x double> %x, %y
   %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
   ret <8 x double> %max
 }
 
 define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, ptr %yp) nounwind {
-; CHECK-LABEL: test3:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpeqd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x0f]
-; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test3:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vpcmpeqd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x0f]
+; EVEX512-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test3:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpeqd 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x76,0x4f,0x01]
+; AVX10-256-NEXT:    vpcmpeqd (%rdi), %ymm0, %k2 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0x17]
+; AVX10-256-NEXT:    vpblendmd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc0]
+; AVX10-256-NEXT:    vpblendmd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %y = load <16 x i32>, ptr %yp, align 4
   %mask = icmp eq <16 x i32> %x, %y
   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@@ -38,33 +63,57 @@ define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, ptr %yp) nounwind {
 }
 
 define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
-; CHECK-LABEL: test4_unsigned:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpnltud %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xc9,0x05]
-; CHECK-NEXT:    vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test4_unsigned:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vpcmpnltud %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xc9,0x05]
+; EVEX512-NEXT:    vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test4_unsigned:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpnltud %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1e,0xcb,0x05]
+; AVX10-256-NEXT:    vpcmpnltud %ymm2, %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xd2,0x05]
+; AVX10-256-NEXT:    vpblendmd %ymm4, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc4]
+; AVX10-256-NEXT:    vpblendmd %ymm5, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xcd]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %mask = icmp uge <16 x i32> %x, %y
   %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
   ret <16 x i32> %max
 }
 
 define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
-; CHECK-LABEL: test5:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc9]
-; CHECK-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test5:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc9]
+; EVEX512-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test5:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpeqq %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x29,0xcb]
+; AVX10-256-NEXT:    vpcmpeqq %ymm2, %ymm0, %k2 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xd2]
+; AVX10-256-NEXT:    vpblendmq %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x64,0xc0]
+; AVX10-256-NEXT:    vpblendmq %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x64,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %mask = icmp eq <8 x i64> %x, %y
   %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
   ret <8 x i64> %max
 }
 
 define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind {
-; CHECK-LABEL: test6_unsigned:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc9,0x06]
-; CHECK-NEXT:    vpblendmq %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc2]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test6_unsigned:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc9,0x06]
+; EVEX512-NEXT:    vpblendmq %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc2]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test6_unsigned:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpnleuq %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1e,0xcb,0x06]
+; AVX10-256-NEXT:    vpcmpnleuq %ymm2, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xd2,0x06]
+; AVX10-256-NEXT:    vpblendmq %ymm4, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x64,0xc4]
+; AVX10-256-NEXT:    vpblendmq %ymm5, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x64,0xcd]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %mask = icmp ugt <8 x i64> %x, %y
   %max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
   ret <8 x i64> %max
@@ -88,6 +137,13 @@ define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
 ; SKX-NEXT:    vcmpltps %xmm2, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xca,0x01]
 ; SKX-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test7:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vxorps %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x57,0xd2]
+; AVX10-256-NEXT:    vcmpltps %xmm2, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xca,0x01]
+; AVX10-256-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 
   %mask = fcmp olt <4 x float> %a, zeroinitializer
   %c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b
@@ -112,6 +168,13 @@ define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
 ; SKX-NEXT:    vcmpltpd %xmm2, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xca,0x01]
 ; SKX-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test8:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x57,0xd2]
+; AVX10-256-NEXT:    vcmpltpd %xmm2, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xca,0x01]
+; AVX10-256-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %mask = fcmp olt <2 x double> %a, zeroinitializer
   %c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
   ret <2 x double>%c
@@ -132,6 +195,12 @@ define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
 ; SKX-NEXT:    vpcmpeqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc9]
 ; SKX-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x64,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test9:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpeqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc9]
+; AVX10-256-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x64,0xc0]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %mask = icmp eq <8 x i32> %x, %y
   %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
   ret <8 x i32> %max
@@ -152,6 +221,12 @@ define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
 ; SKX-NEXT:    vcmpeqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x00]
 ; SKX-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test10:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpeqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x00]
+; AVX10-256-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 
   %mask = fcmp oeq <8 x float> %x, %y
   %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
@@ -168,6 +243,11 @@ define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
 ; SKX:       ## %bb.0:
 ; SKX-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xc1]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test11_unsigned:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xc1]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %mask = icmp ugt <8 x i32> %x, %y
   %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
   ret <8 x i32> %max
@@ -203,6 +283,22 @@ define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
 ; SKX-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test12:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpeqq %ymm4, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc4]
+; AVX10-256-NEXT:    vpcmpeqq %ymm5, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x29,0xcd]
+; AVX10-256-NEXT:    kshiftlb $4, %k1, %k1 ## encoding: [0xc4,0xe3,0x79,0x32,0xc9,0x04]
+; AVX10-256-NEXT:    korb %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x45,0xc1]
+; AVX10-256-NEXT:    vpcmpeqq %ymm6, %ymm2, %k1 ## encoding: [0x62,0xf2,0xed,0x28,0x29,0xce]
+; AVX10-256-NEXT:    vpcmpeqq %ymm7, %ymm3, %k2 ## encoding: [0x62,0xf2,0xe5,0x28,0x29,0xd7]
+; AVX10-256-NEXT:    kshiftlb $4, %k2, %k2 ## encoding: [0xc4,0xe3,0x79,0x32,0xd2,0x04]
+; AVX10-256-NEXT:    korb %k2, %k1, %k1 ## encoding: [0xc5,0xf5,0x45,0xca]
+; AVX10-256-NEXT:    kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
+; AVX10-256-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; AVX10-256-NEXT:    ## kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %res = icmp eq <16 x i64> %a, %b
   %res1 = bitcast <16 x i1> %res to i16
   ret i16 %res1
@@ -237,6 +333,19 @@ define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
 ; SKX-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
 ; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test12_v32i32:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpeqd %ymm4, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc4]
+; AVX10-256-NEXT:    vpcmpeqd %ymm5, %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x76,0xcd]
+; AVX10-256-NEXT:    kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
+; AVX10-256-NEXT:    vpcmpeqd %ymm6, %ymm2, %k1 ## encoding: [0x62,0xf1,0x6d,0x28,0x76,0xce]
+; AVX10-256-NEXT:    vpcmpeqd %ymm7, %ymm3, %k2 ## encoding: [0x62,0xf1,0x65,0x28,0x76,0xd7]
+; AVX10-256-NEXT:    kunpckbw %k1, %k2, %k1 ## encoding: [0xc5,0xed,0x4b,0xc9]
+; AVX10-256-NEXT:    kunpckwd %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x4b,0xc0]
+; AVX10-256-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; AVX10-256-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %res = icmp eq <32 x i32> %a, %b
   %res1 = bitcast <32 x i1> %res to i32
   ret i32 %res1
@@ -291,6 +400,21 @@ define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
 ; SKX-NEXT:    kmovq %k0, %rax ## encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
 ; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test12_v64i16:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpeqw %ymm4, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc4]
+; AVX10-256-NEXT:    vpcmpeqw %ymm5, %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x75,0xcd]
+; AVX10-256-NEXT:    kunpckwd %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x4b,0xc0]
+; AVX10-256-NEXT:    kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
+; AVX10-256-NEXT:    vpcmpeqw %ymm6, %ymm2, %k0 ## encoding: [0x62,0xf1,0x6d,0x28,0x75,0xc6]
+; AVX10-256-NEXT:    vpcmpeqw %ymm7, %ymm3, %k1 ## encoding: [0x62,0xf1,0x65,0x28,0x75,0xcf]
+; AVX10-256-NEXT:    kunpckwd %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x4b,0xc0]
+; AVX10-256-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; AVX10-256-NEXT:    shlq $32, %rax ## encoding: [0x48,0xc1,0xe0,0x20]
+; AVX10-256-NEXT:    orq %rcx, %rax ## encoding: [0x48,0x09,0xc8]
+; AVX10-256-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %res = icmp eq <64 x i16> %a, %b
   %res1 = bitcast <64 x i1> %res to i64
   ret i64 %res1
@@ -310,6 +434,17 @@ define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
 ; SKX-NEXT:    vpmovm2d %k0, %zmm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x38,0xc0]
 ; SKX-NEXT:    vpsrld $31, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xd0,0x1f]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test13:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpeqps %ymm2, %ymm0, %ymm0 ## encoding: [0xc5,0xfc,0xc2,0xc2,0x00]
+; AVX10-256-NEXT:    vbroadcastss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2 ## EVEX TO VEX Compression ymm2 = [1,1,1,1,1,1,1,1]
+; AVX10-256-NEXT:    ## encoding: [0xc4,0xe2,0x7d,0x18,0x15,A,A,A,A]
+; AVX10-256-NEXT:    ## fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
+; AVX10-256-NEXT:    vandps %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x54,0xc2]
+; AVX10-256-NEXT:    vcmpeqps %ymm3, %ymm1, %ymm1 ## encoding: [0xc5,0xf4,0xc2,0xcb,0x00]
+; AVX10-256-NEXT:    vandps %ymm2, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x54,0xca]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 {
   %cmpvector_i = fcmp oeq <16 x float> %a, %b
   %conv = zext <16 x i1> %cmpvector_i to <16 x i32>
@@ -317,12 +452,22 @@ define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
 }
 
 define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
-; CHECK-LABEL: test14:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpsubd %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc9]
-; CHECK-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x66,0xc8]
-; CHECK-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xc1]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test14:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vpsubd %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc9]
+; EVEX512-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x66,0xc8]
+; EVEX512-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xc1]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test14:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpsubd %ymm2, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0xd2]
+; AVX10-256-NEXT:    vpsubd %ymm3, %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfa,0xdb]
+; AVX10-256-NEXT:    vpcmpgtd %ymm1, %ymm3, %k1 ## encoding: [0x62,0xf1,0x65,0x28,0x66,0xc9]
+; AVX10-256-NEXT:    vpcmpgtd %ymm0, %ymm2, %k2 ## encoding: [0x62,0xf1,0x6d,0x28,0x66,0xd0]
+; AVX10-256-NEXT:    vmovdqa32 %ymm2, %ymm0 {%k2} {z} ## encoding: [0x62,0xf1,0x7d,0xaa,0x6f,0xc2]
+; AVX10-256-NEXT:    vmovdqa32 %ymm3, %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0xcb]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %sub_r = sub <16 x i32> %a, %b
   %cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
   %sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>
@@ -332,12 +477,22 @@ define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
 }
 
 define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
-; CHECK-LABEL: test15:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpsubq %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc9]
-; CHECK-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xc8]
-; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0xc1]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test15:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vpsubq %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc9]
+; EVEX512-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xc8]
+; EVEX512-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0xc1]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test15:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpsubq %ymm2, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfb,0xd2]
+; AVX10-256-NEXT:    vpsubq %ymm3, %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfb,0xdb]
+; AVX10-256-NEXT:    vpcmpgtq %ymm1, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x37,0xc9]
+; AVX10-256-NEXT:    vpcmpgtq %ymm0, %ymm2, %k2 ## encoding: [0x62,0xf2,0xed,0x28,0x37,0xd0]
+; AVX10-256-NEXT:    vmovdqa64 %ymm2, %ymm0 {%k2} {z} ## encoding: [0x62,0xf1,0xfd,0xaa,0x6f,0xc2]
+; AVX10-256-NEXT:    vmovdqa64 %ymm3, %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0xcb]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %sub_r = sub <8 x i64> %a, %b
   %cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
   %sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>
@@ -347,22 +502,38 @@ define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
 }
 
 define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
-; CHECK-LABEL: test16:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpnltd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x05]
-; CHECK-NEXT:    vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test16:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vpcmpnltd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x05]
+; EVEX512-NEXT:    vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test16:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpnltd %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xcb,0x05]
+; AVX10-256-NEXT:    vpcmpnltd %ymm2, %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd2,0x05]
+; AVX10-256-NEXT:    vpblendmd %ymm4, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc4]
+; AVX10-256-NEXT:    vpblendmd %ymm5, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xcd]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %mask = icmp sge <16 x i32> %x, %y
   %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
   ret <16 x i32> %max
 }
 
 define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
-; CHECK-LABEL: test17:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpgtd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0x0f]
-; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test17:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vpcmpgtd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0x0f]
+; EVEX512-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test17:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpgtd 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x66,0x4f,0x01]
+; AVX10-256-NEXT:    vpcmpgtd (%rdi), %ymm0, %k2 ## encoding: [0x62,0xf1,0x7d,0x28,0x66,0x17]
+; AVX10-256-NEXT:    vpblendmd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc0]
+; AVX10-256-NEXT:    vpblendmd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %y = load <16 x i32>, ptr %y.ptr, align 4
   %mask = icmp sgt <16 x i32> %x, %y
   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@@ -370,11 +541,19 @@ define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
 }
 
 define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
-; CHECK-LABEL: test18:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpled (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0x0f,0x02]
-; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test18:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vpcmpled (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0x0f,0x02]
+; EVEX512-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test18:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpled 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0x4f,0x01,0x02]
+; AVX10-256-NEXT:    vpcmpled (%rdi), %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0x17,0x02]
+; AVX10-256-NEXT:    vpblendmd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc0]
+; AVX10-256-NEXT:    vpblendmd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %y = load <16 x i32>, ptr %y.ptr, align 4
   %mask = icmp sle <16 x i32> %x, %y
   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@@ -382,11 +561,19 @@ define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
 }
 
 define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
-; CHECK-LABEL: test19:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpleud (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0x0f,0x02]
-; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test19:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vpcmpleud (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0x0f,0x02]
+; EVEX512-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test19:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpleud 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1e,0x4f,0x01,0x02]
+; AVX10-256-NEXT:    vpcmpleud (%rdi), %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0x17,0x02]
+; AVX10-256-NEXT:    vpblendmd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc0]
+; AVX10-256-NEXT:    vpblendmd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %y = load <16 x i32>, ptr %y.ptr, align 4
   %mask = icmp ule <16 x i32> %x, %y
   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@@ -394,12 +581,22 @@ define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
 }
 
 define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
-; CHECK-LABEL: test20:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc9]
-; CHECK-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf1,0x6d,0x49,0x76,0xcb]
-; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test20:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc9]
+; EVEX512-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf1,0x6d,0x49,0x76,0xcb]
+; EVEX512-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test20:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x76,0xcb]
+; AVX10-256-NEXT:    vpcmpeqd %ymm2, %ymm0, %k2 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xd2]
+; AVX10-256-NEXT:    vpcmpeqd %ymm6, %ymm4, %k2 {%k2} ## encoding: [0x62,0xf1,0x5d,0x2a,0x76,0xd6]
+; AVX10-256-NEXT:    vpblendmd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc0]
+; AVX10-256-NEXT:    vpcmpeqd %ymm7, %ymm5, %k1 {%k1} ## encoding: [0x62,0xf1,0x55,0x29,0x76,0xcf]
+; AVX10-256-NEXT:    vpblendmd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %mask1 = icmp eq <16 x i32> %x1, %y1
   %mask0 = icmp eq <16 x i32> %x, %y
   %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
@@ -408,12 +605,22 @@ define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i3
 }
 
 define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
-; CHECK-LABEL: test21:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpleq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x02]
-; CHECK-NEXT:    vpcmpnltq %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf3,0xed,0x49,0x1f,0xcb,0x05]
-; CHECK-NEXT:    vpblendmq %zmm0, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x64,0xc0]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test21:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vpcmpleq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x02]
+; EVEX512-NEXT:    vpcmpnltq %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf3,0xed,0x49,0x1f,0xcb,0x05]
+; EVEX512-NEXT:    vpblendmq %zmm0, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x64,0xc0]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test21:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpleq %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xcb,0x02]
+; AVX10-256-NEXT:    vpcmpleq %ymm2, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd2,0x02]
+; AVX10-256-NEXT:    vpcmpnltq %ymm6, %ymm4, %k2 {%k2} ## encoding: [0x62,0xf3,0xdd,0x2a,0x1f,0xd6,0x05]
+; AVX10-256-NEXT:    vpblendmq %ymm0, %ymm4, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xdd,0x2a,0x64,0xc0]
+; AVX10-256-NEXT:    vpcmpnltq %ymm7, %ymm5, %k1 {%k1} ## encoding: [0x62,0xf3,0xd5,0x29,0x1f,0xcf,0x05]
+; AVX10-256-NEXT:    vpblendmq %ymm1, %ymm5, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xd5,0x29,0x64,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %mask1 = icmp sge <8 x i64> %x1, %y1
   %mask0 = icmp sle <8 x i64> %x, %y
   %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
@@ -422,12 +629,22 @@ define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y
 }
 
 define <8 x i64> @test22(<8 x i64> %x, ptr %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
-; CHECK-LABEL: test22:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpgtq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xca]
-; CHECK-NEXT:    vpcmpgtq (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0x0f]
-; CHECK-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test22:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vpcmpgtq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xca]
+; EVEX512-NEXT:    vpcmpgtq (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0x0f]
+; EVEX512-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test22:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpgtq %ymm5, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x37,0xcd]
+; AVX10-256-NEXT:    vpcmpgtq %ymm4, %ymm2, %k2 ## encoding: [0x62,0xf2,0xed,0x28,0x37,0xd4]
+; AVX10-256-NEXT:    vpcmpgtq (%rdi), %ymm0, %k2 {%k2} ## encoding: [0x62,0xf2,0xfd,0x2a,0x37,0x17]
+; AVX10-256-NEXT:    vpblendmq %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x64,0xc0]
+; AVX10-256-NEXT:    vpcmpgtq 32(%rdi), %ymm1, %k1 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x37,0x4f,0x01]
+; AVX10-256-NEXT:    vpblendmq %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x64,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %mask1 = icmp sgt <8 x i64> %x1, %y1
   %y = load <8 x i64>, ptr %y.ptr, align 4
   %mask0 = icmp sgt <8 x i64> %x, %y
@@ -437,12 +654,22 @@ define <8 x i64> @test22(<8 x i64> %x, ptr %y.ptr, <8 x i64> %x1, <8 x i64> %y1)
 }
 
 define <16 x i32> @test23(<16 x i32> %x, ptr %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
-; CHECK-LABEL: test23:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
-; CHECK-NEXT:    vpcmpleud (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0x0f,0x02]
-; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test23:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
+; EVEX512-NEXT:    vpcmpleud (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0x0f,0x02]
+; EVEX512-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test23:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpnltd %ymm5, %ymm3, %k1 ## encoding: [0x62,0xf3,0x65,0x28,0x1f,0xcd,0x05]
+; AVX10-256-NEXT:    vpcmpnltd %ymm4, %ymm2, %k2 ## encoding: [0x62,0xf3,0x6d,0x28,0x1f,0xd4,0x05]
+; AVX10-256-NEXT:    vpcmpleud (%rdi), %ymm0, %k2 {%k2} ## encoding: [0x62,0xf3,0x7d,0x2a,0x1e,0x17,0x02]
+; AVX10-256-NEXT:    vpblendmd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc0]
+; AVX10-256-NEXT:    vpcmpleud 32(%rdi), %ymm1, %k1 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x1e,0x4f,0x01,0x02]
+; AVX10-256-NEXT:    vpblendmd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %mask1 = icmp sge <16 x i32> %x1, %y1
   %y = load <16 x i32>, ptr %y.ptr, align 4
   %mask0 = icmp ule <16 x i32> %x, %y
@@ -452,11 +679,20 @@ define <16 x i32> @test23(<16 x i32> %x, ptr %y.ptr, <16 x i32> %x1, <16 x i32>
 }
 
 define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, ptr %yb.ptr) nounwind {
-; CHECK-LABEL: test24:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x58,0x29,0x0f]
-; CHECK-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test24:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x58,0x29,0x0f]
+; EVEX512-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test24:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpbroadcastq (%rdi), %ymm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0x27]
+; AVX10-256-NEXT:    vpcmpeqq %ymm4, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x29,0xcc]
+; AVX10-256-NEXT:    vpcmpeqq %ymm4, %ymm0, %k2 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xd4]
+; AVX10-256-NEXT:    vpblendmq %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x64,0xc0]
+; AVX10-256-NEXT:    vpblendmq %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x64,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %yb = load i64, ptr %yb.ptr, align 4
   %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
   %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -466,11 +702,20 @@ define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, ptr %yb.ptr) nounwind {
 }
 
 define <16 x i32> @test25(<16 x i32> %x, ptr %yb.ptr, <16 x i32> %x1) nounwind {
-; CHECK-LABEL: test25:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpled (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x58,0x1f,0x0f,0x02]
-; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test25:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vpcmpled (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x58,0x1f,0x0f,0x02]
+; EVEX512-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test25:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpbroadcastd (%rdi), %ymm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x27]
+; AVX10-256-NEXT:    vpcmpled %ymm4, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xcc,0x02]
+; AVX10-256-NEXT:    vpcmpled %ymm4, %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd4,0x02]
+; AVX10-256-NEXT:    vpblendmd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc0]
+; AVX10-256-NEXT:    vpblendmd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %yb = load i32, ptr %yb.ptr, align 4
   %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
   %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -480,12 +725,23 @@ define <16 x i32> @test25(<16 x i32> %x, ptr %yb.ptr, <16 x i32> %x1) nounwind {
 }
 
 define <16 x i32> @test26(<16 x i32> %x, ptr %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
-; CHECK-LABEL: test26:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
-; CHECK-NEXT:    vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x66,0x0f]
-; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test26:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
+; EVEX512-NEXT:    vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x66,0x0f]
+; EVEX512-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test26:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpbroadcastd (%rdi), %ymm6 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x37]
+; AVX10-256-NEXT:    vpcmpgtd %ymm6, %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x66,0xce]
+; AVX10-256-NEXT:    vpcmpgtd %ymm6, %ymm0, %k2 ## encoding: [0x62,0xf1,0x7d,0x28,0x66,0xd6]
+; AVX10-256-NEXT:    vpcmpnltd %ymm4, %ymm2, %k2 {%k2} ## encoding: [0x62,0xf3,0x6d,0x2a,0x1f,0xd4,0x05]
+; AVX10-256-NEXT:    vpblendmd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x64,0xc0]
+; AVX10-256-NEXT:    vpcmpnltd %ymm5, %ymm3, %k1 {%k1} ## encoding: [0x62,0xf3,0x65,0x29,0x1f,0xcd,0x05]
+; AVX10-256-NEXT:    vpblendmd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x64,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %mask1 = icmp sge <16 x i32> %x1, %y1
   %yb = load i32, ptr %yb.ptr, align 4
   %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
@@ -497,12 +753,23 @@ define <16 x i32> @test26(<16 x i32> %x, ptr %yb.ptr, <16 x i32> %x1, <16 x i32>
 }
 
 define <8 x i64> @test27(<8 x i64> %x, ptr %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
-; CHECK-LABEL: test27:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpnltq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x05]
-; CHECK-NEXT:    vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x59,0x1f,0x0f,0x02]
-; CHECK-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test27:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vpcmpnltq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x05]
+; EVEX512-NEXT:    vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x59,0x1f,0x0f,0x02]
+; EVEX512-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test27:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpbroadcastq (%rdi), %ymm6 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0x37]
+; AVX10-256-NEXT:    vpcmpleq %ymm6, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xce,0x02]
+; AVX10-256-NEXT:    vpcmpleq %ymm6, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd6,0x02]
+; AVX10-256-NEXT:    vpcmpnltq %ymm4, %ymm2, %k2 {%k2} ## encoding: [0x62,0xf3,0xed,0x2a,0x1f,0xd4,0x05]
+; AVX10-256-NEXT:    vpblendmq %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x64,0xc0]
+; AVX10-256-NEXT:    vpcmpnltq %ymm5, %ymm3, %k1 {%k1} ## encoding: [0x62,0xf3,0xe5,0x29,0x1f,0xcd,0x05]
+; AVX10-256-NEXT:    vpblendmq %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x64,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %mask1 = icmp sge <8 x i64> %x1, %y1
   %yb = load i64, ptr %yb.ptr, align 4
   %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
@@ -530,6 +797,20 @@ define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1
 ; SKX-NEXT:    kxnorb %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x46,0xc1]
 ; SKX-NEXT:    vpmovm2d %k0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x38,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test28:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpgtq %ymm2, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x37,0xc2]
+; AVX10-256-NEXT:    vpcmpgtq %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x37,0xcb]
+; AVX10-256-NEXT:    kshiftlb $4, %k1, %k1 ## encoding: [0xc4,0xe3,0x79,0x32,0xc9,0x04]
+; AVX10-256-NEXT:    korb %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x45,0xc1]
+; AVX10-256-NEXT:    vpcmpgtq %ymm6, %ymm4, %k1 ## encoding: [0x62,0xf2,0xdd,0x28,0x37,0xce]
+; AVX10-256-NEXT:    vpcmpgtq %ymm7, %ymm5, %k2 ## encoding: [0x62,0xf2,0xd5,0x28,0x37,0xd7]
+; AVX10-256-NEXT:    kshiftlb $4, %k2, %k2 ## encoding: [0xc4,0xe3,0x79,0x32,0xd2,0x04]
+; AVX10-256-NEXT:    korb %k2, %k1, %k1 ## encoding: [0xc5,0xf5,0x45,0xca]
+; AVX10-256-NEXT:    kxnorb %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x46,0xc1]
+; AVX10-256-NEXT:    vpmovm2d %k0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x38,0xc0]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %x_gt_y = icmp sgt <8 x i64> %x, %y
   %x1_gt_y1 = icmp sgt <8 x i64> %x1, %y1
   %res = icmp eq <8 x i1>%x_gt_y, %x1_gt_y1
@@ -566,6 +847,19 @@ define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32>
 ; SKX-NEXT:    vpmovm2b %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
 ; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test29:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpgtd %ymm2, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc2]
+; AVX10-256-NEXT:    vpcmpgtd %ymm3, %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x66,0xcb]
+; AVX10-256-NEXT:    kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
+; AVX10-256-NEXT:    vpcmpgtd %ymm6, %ymm4, %k1 ## encoding: [0x62,0xf1,0x5d,0x28,0x66,0xce]
+; AVX10-256-NEXT:    vpcmpgtd %ymm7, %ymm5, %k2 ## encoding: [0x62,0xf1,0x55,0x28,0x66,0xd7]
+; AVX10-256-NEXT:    kunpckbw %k1, %k2, %k1 ## encoding: [0xc5,0xed,0x4b,0xc9]
+; AVX10-256-NEXT:    kxorw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x47,0xc1]
+; AVX10-256-NEXT:    vpmovm2b %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
+; AVX10-256-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %x_gt_y = icmp sgt <16 x i32> %x, %y
   %x1_gt_y1 = icmp sgt <16 x i32> %x1, %y1
   %res = icmp ne <16 x i1>%x_gt_y, %x1_gt_y1
@@ -588,6 +882,12 @@ define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind {
 ; SKX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc9,0x00]
 ; SKX-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test30:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpeqpd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc9,0x00]
+; AVX10-256-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 
   %mask = fcmp oeq <4 x double> %x, %y
   %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y
@@ -611,6 +911,12 @@ define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, ptr %yp) nounwind
 ; SKX-NEXT:    vcmpltpd (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0x0f,0x01]
 ; SKX-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test31:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpltpd (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0x0f,0x01]
+; AVX10-256-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 
   %y = load <2 x double>, ptr %yp, align 4
   %mask = fcmp olt <2 x double> %x, %y
@@ -635,6 +941,12 @@ define <2 x double> @test31_commute(<2 x double> %x, <2 x double> %x1, ptr %yp)
 ; SKX-NEXT:    vcmpgtpd (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0x0f,0x0e]
 ; SKX-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test31_commute:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpgtpd (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0x0f,0x0e]
+; AVX10-256-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 
   %y = load <2 x double>, ptr %yp, align 4
   %mask = fcmp olt <2 x double> %y, %x
@@ -658,6 +970,12 @@ define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, ptr %yp) nounwind
 ; SKX-NEXT:    vcmpltpd (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x0f,0x01]
 ; SKX-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test32:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpltpd (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x0f,0x01]
+; AVX10-256-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 
   %y = load <4 x double>, ptr %yp, align 4
   %mask = fcmp ogt <4 x double> %y, %x
@@ -681,6 +999,12 @@ define <4 x double> @test32_commute(<4 x double> %x, <4 x double> %x1, ptr %yp)
 ; SKX-NEXT:    vcmpgtpd (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x0f,0x0e]
 ; SKX-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test32_commute:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpgtpd (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x0f,0x0e]
+; AVX10-256-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 
   %y = load <4 x double>, ptr %yp, align 4
   %mask = fcmp ogt <4 x double> %x, %y
@@ -689,11 +1013,19 @@ define <4 x double> @test32_commute(<4 x double> %x, <4 x double> %x1, ptr %yp)
 }
 
 define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, ptr %yp) nounwind {
-; CHECK-LABEL: test33:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vcmpltpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x01]
-; CHECK-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test33:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vcmpltpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x01]
+; EVEX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test33:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpltpd 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x28,0xc2,0x4f,0x01,0x01]
+; AVX10-256-NEXT:    vcmpltpd (%rdi), %ymm0, %k2 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x17,0x01]
+; AVX10-256-NEXT:    vblendmpd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x65,0xc0]
+; AVX10-256-NEXT:    vblendmpd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x65,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %y = load <8 x double>, ptr %yp, align 4
   %mask = fcmp olt <8 x double> %x, %y
   %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
@@ -701,11 +1033,19 @@ define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, ptr %yp) nounwind
 }
 
 define <8 x double> @test33_commute(<8 x double> %x, <8 x double> %x1, ptr %yp) nounwind {
-; CHECK-LABEL: test33_commute:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vcmpgtpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x0e]
-; CHECK-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test33_commute:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vcmpgtpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x0e]
+; EVEX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test33_commute:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpgtpd 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x28,0xc2,0x4f,0x01,0x0e]
+; AVX10-256-NEXT:    vcmpgtpd (%rdi), %ymm0, %k2 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x17,0x0e]
+; AVX10-256-NEXT:    vblendmpd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x65,0xc0]
+; AVX10-256-NEXT:    vblendmpd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x65,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %y = load <8 x double>, ptr %yp, align 4
   %mask = fcmp olt <8 x double> %y, %x
   %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
@@ -729,6 +1069,12 @@ define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, ptr %yp) nounwind {
 ; SKX-NEXT:    vcmpltps (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0x0f,0x01]
 ; SKX-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test34:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpltps (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0x0f,0x01]
+; AVX10-256-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %y = load <4 x float>, ptr %yp, align 4
   %mask = fcmp olt <4 x float> %x, %y
   %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
@@ -752,6 +1098,12 @@ define <4 x float> @test34_commute(<4 x float> %x, <4 x float> %x1, ptr %yp) nou
 ; SKX-NEXT:    vcmpgtps (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0x0f,0x0e]
 ; SKX-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test34_commute:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpgtps (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0x0f,0x0e]
+; AVX10-256-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %y = load <4 x float>, ptr %yp, align 4
   %mask = fcmp olt <4 x float> %y, %x
   %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
@@ -774,6 +1126,12 @@ define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, ptr %yp) nounwind {
 ; SKX-NEXT:    vcmpltps (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x0f,0x01]
 ; SKX-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test35:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpltps (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x0f,0x01]
+; AVX10-256-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 
   %y = load <8 x float>, ptr %yp, align 4
   %mask = fcmp ogt <8 x float> %y, %x
@@ -797,6 +1155,12 @@ define <8 x float> @test35_commute(<8 x float> %x, <8 x float> %x1, ptr %yp) nou
 ; SKX-NEXT:    vcmpgtps (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x0f,0x0e]
 ; SKX-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test35_commute:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpgtps (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x0f,0x0e]
+; AVX10-256-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 
   %y = load <8 x float>, ptr %yp, align 4
   %mask = fcmp ogt <8 x float> %x, %y
@@ -805,11 +1169,19 @@ define <8 x float> @test35_commute(<8 x float> %x, <8 x float> %x1, ptr %yp) nou
 }
 
 define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, ptr %yp) nounwind {
-; CHECK-LABEL: test36:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vcmpltps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x01]
-; CHECK-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test36:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vcmpltps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x01]
+; EVEX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test36:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpltps 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0x4f,0x01,0x01]
+; AVX10-256-NEXT:    vcmpltps (%rdi), %ymm0, %k2 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x17,0x01]
+; AVX10-256-NEXT:    vblendmps %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x65,0xc0]
+; AVX10-256-NEXT:    vblendmps %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x65,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %y = load <16 x float>, ptr %yp, align 4
   %mask = fcmp olt <16 x float> %x, %y
   %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
@@ -817,11 +1189,19 @@ define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, ptr %yp) nounwind
 }
 
 define <16 x float> @test36_commute(<16 x float> %x, <16 x float> %x1, ptr %yp) nounwind {
-; CHECK-LABEL: test36_commute:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vcmpgtps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x0e]
-; CHECK-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test36_commute:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vcmpgtps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x0e]
+; EVEX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test36_commute:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpgtps 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0x4f,0x01,0x0e]
+; AVX10-256-NEXT:    vcmpgtps (%rdi), %ymm0, %k2 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x17,0x0e]
+; AVX10-256-NEXT:    vblendmps %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x65,0xc0]
+; AVX10-256-NEXT:    vblendmps %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x65,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %y = load <16 x float>, ptr %yp, align 4
   %mask = fcmp olt <16 x float> %y, %x
   %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
@@ -829,11 +1209,20 @@ define <16 x float> @test36_commute(<16 x float> %x, <16 x float> %x1, ptr %yp)
 }
 
 define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, ptr %ptr) nounwind {
-; CHECK-LABEL: test37:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
-; CHECK-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test37:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
+; EVEX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test37:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vbroadcastsd (%rdi), %ymm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x27]
+; AVX10-256-NEXT:    vcmpltpd %ymm4, %ymm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x28,0xc2,0xcc,0x01]
+; AVX10-256-NEXT:    vcmpltpd %ymm4, %ymm0, %k2 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xd4,0x01]
+; AVX10-256-NEXT:    vblendmpd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x65,0xc0]
+; AVX10-256-NEXT:    vblendmpd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x65,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 
   %a = load double, ptr %ptr
   %v = insertelement <8 x double> undef, double %a, i32 0
@@ -845,11 +1234,20 @@ define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, ptr %ptr) nounwin
 }
 
 define <8 x double> @test37_commute(<8 x double> %x, <8 x double> %x1, ptr %ptr) nounwind {
-; CHECK-LABEL: test37_commute:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
-; CHECK-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test37_commute:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
+; EVEX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test37_commute:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vbroadcastsd (%rdi), %ymm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x27]
+; AVX10-256-NEXT:    vcmpltpd %ymm1, %ymm4, %k1 ## encoding: [0x62,0xf1,0xdd,0x28,0xc2,0xc9,0x01]
+; AVX10-256-NEXT:    vcmpltpd %ymm0, %ymm4, %k2 ## encoding: [0x62,0xf1,0xdd,0x28,0xc2,0xd0,0x01]
+; AVX10-256-NEXT:    vblendmpd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x65,0xc0]
+; AVX10-256-NEXT:    vblendmpd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x65,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 
   %a = load double, ptr %ptr
   %v = insertelement <8 x double> undef, double %a, i32 0
@@ -875,6 +1273,12 @@ define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, ptr %ptr) nounwin
 ; SKX-NEXT:    vcmpltpd (%rdi){1to4}, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x38,0xc2,0x0f,0x01]
 ; SKX-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test38:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpltpd (%rdi){1to4}, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x38,0xc2,0x0f,0x01]
+; AVX10-256-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 
   %a = load double, ptr %ptr
   %v = insertelement <4 x double> undef, double %a, i32 0
@@ -900,6 +1304,12 @@ define <4 x double> @test38_commute(<4 x double> %x, <4 x double> %x1, ptr %ptr)
 ; SKX-NEXT:    vcmpgtpd (%rdi){1to4}, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x38,0xc2,0x0f,0x0e]
 ; SKX-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test38_commute:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpgtpd (%rdi){1to4}, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x38,0xc2,0x0f,0x0e]
+; AVX10-256-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 
   %a = load double, ptr %ptr
   %v = insertelement <4 x double> undef, double %a, i32 0
@@ -926,6 +1336,12 @@ define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, ptr %ptr) nounwin
 ; SKX-NEXT:    vcmpltpd (%rdi){1to2}, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x18,0xc2,0x0f,0x01]
 ; SKX-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test39:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpltpd (%rdi){1to2}, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x18,0xc2,0x0f,0x01]
+; AVX10-256-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 
   %a = load double, ptr %ptr
   %v = insertelement <2 x double> undef, double %a, i32 0
@@ -952,6 +1368,12 @@ define <2 x double> @test39_commute(<2 x double> %x, <2 x double> %x1, ptr %ptr)
 ; SKX-NEXT:    vcmpgtpd (%rdi){1to2}, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x18,0xc2,0x0f,0x0e]
 ; SKX-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test39_commute:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpgtpd (%rdi){1to2}, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x18,0xc2,0x0f,0x0e]
+; AVX10-256-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 
   %a = load double, ptr %ptr
   %v = insertelement <2 x double> undef, double %a, i32 0
@@ -964,11 +1386,20 @@ define <2 x double> @test39_commute(<2 x double> %x, <2 x double> %x1, ptr %ptr)
 
 
 define <16  x float> @test40(<16  x float> %x, <16  x float> %x1, ptr %ptr) nounwind {
-; CHECK-LABEL: test40:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
-; CHECK-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test40:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
+; EVEX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test40:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vbroadcastss (%rdi), %ymm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0x27]
+; AVX10-256-NEXT:    vcmpltps %ymm4, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xcc,0x01]
+; AVX10-256-NEXT:    vcmpltps %ymm4, %ymm0, %k2 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xd4,0x01]
+; AVX10-256-NEXT:    vblendmps %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x65,0xc0]
+; AVX10-256-NEXT:    vblendmps %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x65,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 
   %a = load float, ptr %ptr
   %v = insertelement <16  x float> undef, float %a, i32 0
@@ -980,11 +1411,20 @@ define <16  x float> @test40(<16  x float> %x, <16  x float> %x1, ptr %ptr) noun
 }
 
 define <16  x float> @test40_commute(<16  x float> %x, <16  x float> %x1, ptr %ptr) nounwind {
-; CHECK-LABEL: test40_commute:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
-; CHECK-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
+; EVEX512-LABEL: test40_commute:
+; EVEX512:       ## %bb.0:
+; EVEX512-NEXT:    vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
+; EVEX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
+; EVEX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test40_commute:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vbroadcastss (%rdi), %ymm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0x27]
+; AVX10-256-NEXT:    vcmpltps %ymm1, %ymm4, %k1 ## encoding: [0x62,0xf1,0x5c,0x28,0xc2,0xc9,0x01]
+; AVX10-256-NEXT:    vcmpltps %ymm0, %ymm4, %k2 ## encoding: [0x62,0xf1,0x5c,0x28,0xc2,0xd0,0x01]
+; AVX10-256-NEXT:    vblendmps %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0x6d,0x2a,0x65,0xc0]
+; AVX10-256-NEXT:    vblendmps %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x65,0x29,0x65,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 
   %a = load float, ptr %ptr
   %v = insertelement <16  x float> undef, float %a, i32 0
@@ -1010,6 +1450,12 @@ define <8  x float> @test41(<8  x float> %x, <8  x float> %x1, ptr %ptr) nounwin
 ; SKX-NEXT:    vcmpltps (%rdi){1to8}, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x38,0xc2,0x0f,0x01]
 ; SKX-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test41:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpltps (%rdi){1to8}, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x38,0xc2,0x0f,0x01]
+; AVX10-256-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 
   %a = load float, ptr %ptr
   %v = insertelement <8  x float> undef, float %a, i32 0
@@ -1035,6 +1481,12 @@ define <8  x float> @test41_commute(<8  x float> %x, <8  x float> %x1, ptr %ptr)
 ; SKX-NEXT:    vcmpgtps (%rdi){1to8}, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x38,0xc2,0x0f,0x0e]
 ; SKX-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test41_commute:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpgtps (%rdi){1to8}, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x38,0xc2,0x0f,0x0e]
+; AVX10-256-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 
   %a = load float, ptr %ptr
   %v = insertelement <8  x float> undef, float %a, i32 0
@@ -1061,6 +1513,12 @@ define <4  x float> @test42(<4  x float> %x, <4  x float> %x1, ptr %ptr) nounwin
 ; SKX-NEXT:    vcmpltps (%rdi){1to4}, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0x0f,0x01]
 ; SKX-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test42:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpltps (%rdi){1to4}, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0x0f,0x01]
+; AVX10-256-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 
   %a = load float, ptr %ptr
   %v = insertelement <4  x float> undef, float %a, i32 0
@@ -1087,6 +1545,12 @@ define <4  x float> @test42_commute(<4  x float> %x, <4  x float> %x1, ptr %ptr)
 ; SKX-NEXT:    vcmpgtps (%rdi){1to4}, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0x0f,0x0e]
 ; SKX-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test42_commute:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpgtps (%rdi){1to4}, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0x0f,0x0e]
+; AVX10-256-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 
   %a = load float, ptr %ptr
   %v = insertelement <4  x float> undef, float %a, i32 0
@@ -1122,6 +1586,18 @@ define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, ptr %ptr,<8 x i1>
 ; SKX-NEXT:    vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x01]
 ; SKX-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test43:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpsllw $15, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0x71,0xf4,0x0f]
+; AVX10-256-NEXT:    vpmovw2m %xmm4, %k1 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xcc]
+; AVX10-256-NEXT:    vbroadcastsd (%rdi), %ymm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x27]
+; AVX10-256-NEXT:    vcmpltpd %ymm4, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xc2,0xd4,0x01]
+; AVX10-256-NEXT:    vblendmpd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x65,0xc0]
+; AVX10-256-NEXT:    kshiftrb $4, %k1, %k1 ## encoding: [0xc4,0xe3,0x79,0x30,0xc9,0x04]
+; AVX10-256-NEXT:    vcmpltpd %ymm4, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0xc2,0xcc,0x01]
+; AVX10-256-NEXT:    vblendmpd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x65,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 
   %a = load double, ptr %ptr
   %v = insertelement <8 x double> undef, double %a, i32 0
@@ -1158,6 +1634,18 @@ define <8 x double> @test43_commute(<8 x double> %x, <8 x double> %x1, ptr %ptr,
 ; SKX-NEXT:    vcmpgtpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x0e]
 ; SKX-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test43_commute:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpsllw $15, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0x71,0xf4,0x0f]
+; AVX10-256-NEXT:    vpmovw2m %xmm4, %k1 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xcc]
+; AVX10-256-NEXT:    vbroadcastsd (%rdi), %ymm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x27]
+; AVX10-256-NEXT:    vcmpltpd %ymm0, %ymm4, %k2 {%k1} ## encoding: [0x62,0xf1,0xdd,0x29,0xc2,0xd0,0x01]
+; AVX10-256-NEXT:    vblendmpd %ymm0, %ymm2, %ymm0 {%k2} ## encoding: [0x62,0xf2,0xed,0x2a,0x65,0xc0]
+; AVX10-256-NEXT:    kshiftrb $4, %k1, %k1 ## encoding: [0xc4,0xe3,0x79,0x30,0xc9,0x04]
+; AVX10-256-NEXT:    vcmpltpd %ymm1, %ymm4, %k1 {%k1} ## encoding: [0x62,0xf1,0xdd,0x29,0xc2,0xc9,0x01]
+; AVX10-256-NEXT:    vblendmpd %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x65,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 
   %a = load double, ptr %ptr
   %v = insertelement <8 x double> undef, double %a, i32 0
@@ -1181,6 +1669,12 @@ define <4 x i32> @test44(<4 x i16> %x, <4 x i16> %y) #0 {
 ; SKX-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
 ; SKX-NEXT:    vpmovm2d %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test44:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
+; AVX10-256-NEXT:    vpmovm2d %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %mask = icmp eq <4 x i16> %x, %y
   %1 = sext <4 x i1> %mask to <4 x i32>
   ret <4 x i32> %1
@@ -1202,6 +1696,13 @@ define <2 x i64> @test45(<2 x i16> %x, <2 x i16> %y) #0 {
 ; SKX-NEXT:    vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
 ; SKX-NEXT:    vpsrlq $63, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x3f]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test45:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
+; AVX10-256-NEXT:    vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
+; AVX10-256-NEXT:    vpsrlq $63, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x3f]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %mask = icmp eq <2 x i16> %x, %y
   %1 = zext <2 x i1> %mask to <2 x i64>
   ret <2 x i64> %1
@@ -1223,6 +1724,13 @@ define <2 x i64> @test46(<2 x float> %x, <2 x float> %y) #0 {
 ; SKX-NEXT:    vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
 ; SKX-NEXT:    vpsrlq $63, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x3f]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test46:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vcmpeqps %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x00]
+; AVX10-256-NEXT:    vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
+; AVX10-256-NEXT:    vpsrlq $63, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x3f]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %mask = fcmp oeq <2 x float> %x, %y
   %1 = zext <2 x i1> %mask to <2 x i64>
   ret <2 x i64> %1
@@ -1254,6 +1762,15 @@ define <16 x i8> @test47(<16 x i32> %a, <16 x i8> %b, <16 x i8> %c) {
 ; SKX-NEXT:    vpblendmb %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x66,0xc1]
 ; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test47:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vptestnmd %ymm0, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc0]
+; AVX10-256-NEXT:    vptestnmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x76,0x28,0x27,0xc9]
+; AVX10-256-NEXT:    kunpckbw %k0, %k1, %k1 ## encoding: [0xc5,0xf5,0x4b,0xc8]
+; AVX10-256-NEXT:    vpblendmb %xmm2, %xmm3, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x65,0x09,0x66,0xc2]
+; AVX10-256-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %cmp = icmp eq <16 x i32> %a, zeroinitializer
   %res = select <16 x i1> %cmp, <16 x i8> %b, <16 x i8> %c
   ret <16 x i8> %res
@@ -1282,6 +1799,14 @@ define <16 x i16> @test48(<16 x i32> %a, <16 x i16> %b, <16 x i16> %c) {
 ; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
 ; SKX-NEXT:    vpblendmw %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x66,0xc1]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test48:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vptestnmd %ymm0, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc0]
+; AVX10-256-NEXT:    vptestnmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x76,0x28,0x27,0xc9]
+; AVX10-256-NEXT:    kunpckbw %k0, %k1, %k1 ## encoding: [0xc5,0xf5,0x4b,0xc8]
+; AVX10-256-NEXT:    vpblendmw %ymm2, %ymm3, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x66,0xc2]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %cmp = icmp eq <16 x i32> %a, zeroinitializer
   %res = select <16 x i1> %cmp, <16 x i16> %b, <16 x i16> %c
   ret <16 x i16> %res
@@ -1313,6 +1838,16 @@ define <8 x i16> @test49(<8 x i64> %a, <8 x i16> %b, <8 x i16> %c) {
 ; SKX-NEXT:    vpblendmw %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x66,0xc1]
 ; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: test49:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vptestnmq %ymm0, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc0]
+; AVX10-256-NEXT:    vptestnmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf6,0x28,0x27,0xc9]
+; AVX10-256-NEXT:    kshiftlb $4, %k1, %k1 ## encoding: [0xc4,0xe3,0x79,0x32,0xc9,0x04]
+; AVX10-256-NEXT:    korb %k1, %k0, %k1 ## encoding: [0xc5,0xfd,0x45,0xc9]
+; AVX10-256-NEXT:    vpblendmw %xmm2, %xmm3, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xe5,0x09,0x66,0xc2]
+; AVX10-256-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %cmp = icmp eq <8 x i64> %a, zeroinitializer
   %res = select <8 x i1> %cmp, <8 x i16> %b, <8 x i16> %c
   ret <8 x i16> %res
@@ -1342,6 +1877,16 @@ define i16 @pcmpeq_mem_1(<16 x i32> %a, ptr %b) {
 ; SKX-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: pcmpeq_mem_1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpeqd (%rdi), %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0x07]
+; AVX10-256-NEXT:    vpcmpeqd 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x76,0x4f,0x01]
+; AVX10-256-NEXT:    kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
+; AVX10-256-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; AVX10-256-NEXT:    ## kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %load = load <16 x i32>, ptr %b
   %cmp = icmp eq <16 x i32> %a, %load
   %cast = bitcast <16 x i1> %cmp to i16
@@ -1374,6 +1919,16 @@ define i16 @pcmpeq_mem_2(<16 x i32> %a, ptr %b) {
 ; SKX-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: pcmpeq_mem_2:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpcmpeqd (%rdi), %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0x07]
+; AVX10-256-NEXT:    vpcmpeqd 32(%rdi), %ymm1, %k1 ## encoding: [0x62,0xf1,0x75,0x28,0x76,0x4f,0x01]
+; AVX10-256-NEXT:    kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
+; AVX10-256-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; AVX10-256-NEXT:    ## kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %load = load <16 x i32>, ptr %b
   %cmp = icmp eq <16 x i32> %load, %a
   %cast = bitcast <16 x i1> %cmp to i16
@@ -1394,6 +1949,11 @@ define <2 x i64> @PR41066(<2 x i64> %t0, <2 x double> %x, <2 x double> %y) {
 ; SKX:       ## %bb.0:
 ; SKX-NEXT:    vxorps %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: PR41066:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vxorps %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %t1 = fcmp ogt <2 x double> %x, %y
   %t2 = select <2 x i1> %t1, <2 x i64> <i64 undef, i64 0>, <2 x i64> zeroinitializer
   ret <2 x i64> %t2
@@ -1421,6 +1981,16 @@ define <4 x i32> @zext_bool_logic(<4 x i64> %cond1, <4 x i64> %cond2, <4 x i32>
 ; SKX-NEXT:    vpsubd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfa,0xc0]
 ; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: zext_bool_logic:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vptestnmq %ymm0, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc0]
+; AVX10-256-NEXT:    vptestnmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf6,0x28,0x27,0xc9]
+; AVX10-256-NEXT:    korw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x45,0xc1]
+; AVX10-256-NEXT:    vpmovm2d %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
+; AVX10-256-NEXT:    vpsubd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfa,0xc0]
+; AVX10-256-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %a = icmp eq <4 x i64> %cond1, zeroinitializer
   %b = icmp eq <4 x i64> %cond2, zeroinitializer
   %c = or <4 x i1> %a, %b
@@ -1531,6 +2101,17 @@ define void @half_vec_compare(ptr %x, ptr %y) {
 ; SKX-NEXT:    ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
 ; SKX-NEXT:    vpextrw $0, %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x15,0x06,0x00]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: half_vec_compare:
+; AVX10-256:       ## %bb.0: ## %entry
+; AVX10-256-NEXT:    vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
+; AVX10-256-NEXT:    ## xmm0 = mem[0],zero,zero,zero
+; AVX10-256-NEXT:    vxorps %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x57,0xc9]
+; AVX10-256-NEXT:    vcmpneqph %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7c,0x08,0xc2,0xc9,0x04]
+; AVX10-256-NEXT:    vmovdqu8 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x05,A,A,A,A]
+; AVX10-256-NEXT:    ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
+; AVX10-256-NEXT:    vpextrw $0, %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x15,0x06,0x00]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 entry:
   %0 = load <2 x half>, ptr %x
   %1 = fcmp une <2 x half> %0, zeroinitializer
@@ -1571,6 +2152,16 @@ define <8 x i64> @cmp_swap_bug(ptr %x, <8 x i64> %y, <8 x i64> %z) {
 ; SKX-NEXT:    vpmovb2m %xmm2, %k1 ## encoding: [0x62,0xf2,0x7e,0x08,0x29,0xca]
 ; SKX-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: cmp_swap_bug:
+; AVX10-256:       ## %bb.0: ## %entry
+; AVX10-256-NEXT:    vmovdqa (%rdi), %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x27]
+; AVX10-256-NEXT:    vpmovwb %xmm4, %xmm4 ## encoding: [0x62,0xf2,0x7e,0x08,0x30,0xe4]
+; AVX10-256-NEXT:    vpmovb2m %xmm4, %k1 ## encoding: [0x62,0xf2,0x7e,0x08,0x29,0xcc]
+; AVX10-256-NEXT:    vpblendmq %ymm0, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x64,0xc0]
+; AVX10-256-NEXT:    kshiftrb $4, %k1, %k1 ## encoding: [0xc4,0xe3,0x79,0x30,0xc9,0x04]
+; AVX10-256-NEXT:    vpblendmq %ymm1, %ymm3, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xe5,0x29,0x64,0xc9]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
 entry:
   %0 = load <16 x i8>, ptr %x
   %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -1594,6 +2185,12 @@ define <2 x i32> @narrow_cmp_select_reverse(<2 x i64> %x, <2 x i32> %y) nounwind
 ; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
 ; SKX-NEXT:    vmovdqa32 %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xc1]
 ; SKX-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX10-256-LABEL: narrow_cmp_select_reverse:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
+; AVX10-256-NEXT:    vmovdqa32 %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xc1]
+; AVX10-256-NEXT:    retq ## encoding: [0xc3]
   %mask = icmp eq <2 x i64> %x, zeroinitializer
   %res = select <2 x i1> %mask, <2 x i32> zeroinitializer, <2 x i32> %y
   ret <2 x i32> %res
diff --git a/llvm/test/CodeGen/X86/avx512bw-mask-op.ll b/llvm/test/CodeGen/X86/avx512bw-mask-op.ll
index 2a262644836135..3c91c2948fc909 100644
--- a/llvm/test/CodeGen/X86/avx512bw-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-mask-op.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=CHECK,AVX10-256
 
 define i32 @mask32(i32 %x) {
 ; CHECK-LABEL: mask32:
@@ -54,12 +55,22 @@ define void @mask32_mem(ptr %ptr) {
 }
 
 define void @mask64_mem(ptr %ptr) {
-; CHECK-LABEL: mask64_mem:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    kmovq (%rdi), %k0
-; CHECK-NEXT:    knotq %k0, %k0
-; CHECK-NEXT:    kmovq %k0, (%rdi)
-; CHECK-NEXT:    retq
+; SKX-LABEL: mask64_mem:
+; SKX:       ## %bb.0:
+; SKX-NEXT:    kmovq (%rdi), %k0
+; SKX-NEXT:    knotq %k0, %k0
+; SKX-NEXT:    kmovq %k0, (%rdi)
+; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: mask64_mem:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    kmovd (%rdi), %k0
+; AVX10-256-NEXT:    kmovd 4(%rdi), %k1
+; AVX10-256-NEXT:    knotd %k1, %k1
+; AVX10-256-NEXT:    knotd %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, (%rdi)
+; AVX10-256-NEXT:    kmovd %k1, 4(%rdi)
+; AVX10-256-NEXT:    retq
   %x = load i64, ptr %ptr, align 4
   %m0 = bitcast i64 %x to <64 x i1>
   %m1 = xor <64 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
@@ -123,13 +134,27 @@ define i64 @mand64(i64 %x, i64 %y) {
 }
 
 define i64 @mand64_mem(ptr %x, ptr %y) {
-; CHECK-LABEL: mand64_mem:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    kmovq (%rdi), %k0
-; CHECK-NEXT:    kmovq (%rsi), %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kmovq %k0, %rax
-; CHECK-NEXT:    retq
+; SKX-LABEL: mand64_mem:
+; SKX:       ## %bb.0:
+; SKX-NEXT:    kmovq (%rdi), %k0
+; SKX-NEXT:    kmovq (%rsi), %k1
+; SKX-NEXT:    korq %k1, %k0, %k0
+; SKX-NEXT:    kmovq %k0, %rax
+; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: mand64_mem:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    kmovd (%rdi), %k0
+; AVX10-256-NEXT:    kmovd 4(%rdi), %k1
+; AVX10-256-NEXT:    kmovd (%rsi), %k2
+; AVX10-256-NEXT:    kord %k2, %k0, %k0
+; AVX10-256-NEXT:    kmovd 4(%rsi), %k2
+; AVX10-256-NEXT:    kord %k2, %k1, %k1
+; AVX10-256-NEXT:    kmovd %k0, %ecx
+; AVX10-256-NEXT:    kmovd %k1, %eax
+; AVX10-256-NEXT:    shlq $32, %rax
+; AVX10-256-NEXT:    orq %rcx, %rax
+; AVX10-256-NEXT:    retq
   %ma = load <64 x i1>, ptr %x
   %mb = load <64 x i1>, ptr %y
   %mc = and <64 x i1> %ma, %mb
@@ -229,12 +254,22 @@ define <32 x i1> @bitcast_f32_to_v32i1(float %x) {
 }
 
 define <64 x i1> @bitcast_f64_to_v64i1(double %x) {
-; CHECK-LABEL: bitcast_f64_to_v64i1:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vmovq %xmm0, %rax
-; CHECK-NEXT:    kmovq %rax, %k0
-; CHECK-NEXT:    vpmovm2b %k0, %zmm0
-; CHECK-NEXT:    retq
+; SKX-LABEL: bitcast_f64_to_v64i1:
+; SKX:       ## %bb.0:
+; SKX-NEXT:    vmovq %xmm0, %rax
+; SKX-NEXT:    kmovq %rax, %k0
+; SKX-NEXT:    vpmovm2b %k0, %zmm0
+; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: bitcast_f64_to_v64i1:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vmovq %xmm0, %rax
+; AVX10-256-NEXT:    kmovd %eax, %k0
+; AVX10-256-NEXT:    shrq $32, %rax
+; AVX10-256-NEXT:    kmovd %eax, %k1
+; AVX10-256-NEXT:    vpmovm2b %k0, %ymm0
+; AVX10-256-NEXT:    vpmovm2b %k1, %ymm1
+; AVX10-256-NEXT:    retq
   %a = bitcast double %x to <64 x i1>
   ret <64 x i1> %a
 }
@@ -252,14 +287,28 @@ define float @bitcast_v32i1_to_f32(<32 x i1> %x) {
 }
 
 define double @bitcast_v64i1_to_f64(<64 x i1> %x) {
-; CHECK-LABEL: bitcast_v64i1_to_f64:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpsllw $7, %zmm0, %zmm0
-; CHECK-NEXT:    vpmovb2m %zmm0, %k0
-; CHECK-NEXT:    kmovq %k0, %rax
-; CHECK-NEXT:    vmovq %rax, %xmm0
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    retq
+; SKX-LABEL: bitcast_v64i1_to_f64:
+; SKX:       ## %bb.0:
+; SKX-NEXT:    vpsllw $7, %zmm0, %zmm0
+; SKX-NEXT:    vpmovb2m %zmm0, %k0
+; SKX-NEXT:    kmovq %k0, %rax
+; SKX-NEXT:    vmovq %rax, %xmm0
+; SKX-NEXT:    vzeroupper
+; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: bitcast_v64i1_to_f64:
+; AVX10-256:       ## %bb.0:
+; AVX10-256-NEXT:    vpsllw $7, %ymm1, %ymm1
+; AVX10-256-NEXT:    vpmovb2m %ymm1, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    shlq $32, %rax
+; AVX10-256-NEXT:    vpsllw $7, %ymm0, %ymm0
+; AVX10-256-NEXT:    vpmovb2m %ymm0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %ecx
+; AVX10-256-NEXT:    orq %rax, %rcx
+; AVX10-256-NEXT:    vmovq %rcx, %xmm0
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %a = bitcast <64 x i1> %x to double
   ret double %a
 }
diff --git a/llvm/test/CodeGen/X86/kshift.ll b/llvm/test/CodeGen/X86/kshift.ll
index 0acf82f5a144a2..16444adb1dc568 100644
--- a/llvm/test/CodeGen/X86/kshift.ll
+++ b/llvm/test/CodeGen/X86/kshift.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=KNL
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq,avx512bw | FileCheck %s --check-prefix=SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx10.1-256 | FileCheck %s --check-prefix=AVX10-256
 
 define i8 @kshiftl_v8i1_1(<8 x i64> %x, <8 x i64> %y) {
 ; KNL-LABEL: kshiftl_v8i1_1:
@@ -22,6 +23,23 @@ define i8 @kshiftl_v8i1_1(<8 x i64> %x, <8 x i64> %y) {
 ; SKX-NEXT:    # kill: def $al killed $al killed $eax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: kshiftl_v8i1_1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT:    vptestnmq %ymm1, %ymm1, %k1
+; AVX10-256-NEXT:    kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT:    korb %k1, %k0, %k0
+; AVX10-256-NEXT:    kshiftlb $1, %k0, %k0
+; AVX10-256-NEXT:    vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT:    vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT:    kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT:    korb %k2, %k1, %k1
+; AVX10-256-NEXT:    kandb %k1, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %a = icmp eq <8 x i64> %x, zeroinitializer
   %b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
   %c = icmp eq <8 x i64> %y, zeroinitializer
@@ -50,6 +68,21 @@ define i16 @kshiftl_v16i1_1(<16 x i32> %x, <16 x i32> %y) {
 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: kshiftl_v16i1_1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT:    vptestnmd %ymm1, %ymm1, %k1
+; AVX10-256-NEXT:    kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT:    kshiftlw $1, %k0, %k0
+; AVX10-256-NEXT:    vptestnmd %ymm2, %ymm2, %k1
+; AVX10-256-NEXT:    vptestnmd %ymm3, %ymm3, %k2
+; AVX10-256-NEXT:    kunpckbw %k1, %k2, %k1
+; AVX10-256-NEXT:    kandw %k1, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    # kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %a = icmp eq <16 x i32> %x, zeroinitializer
   %b = shufflevector <16 x i1> %a, <16 x i1> zeroinitializer, <16 x i32> <i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
   %c = icmp eq <16 x i32> %y, zeroinitializer
@@ -96,6 +129,20 @@ define i32 @kshiftl_v32i1_1(<32 x i16> %x, <32 x i16> %y) {
 ; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: kshiftl_v32i1_1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestnmw %ymm0, %ymm0, %k0
+; AVX10-256-NEXT:    vptestnmw %ymm1, %ymm1, %k1
+; AVX10-256-NEXT:    kunpckwd %k0, %k1, %k0
+; AVX10-256-NEXT:    kshiftld $1, %k0, %k0
+; AVX10-256-NEXT:    vptestnmw %ymm2, %ymm2, %k1
+; AVX10-256-NEXT:    vptestnmw %ymm3, %ymm3, %k2
+; AVX10-256-NEXT:    kunpckwd %k1, %k2, %k1
+; AVX10-256-NEXT:    kandd %k1, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %a = icmp eq <32 x i16> %x, zeroinitializer
   %b = shufflevector <32 x i1> %a, <32 x i1> zeroinitializer, <32 x i32> <i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
   %c = icmp eq <32 x i16> %y, zeroinitializer
@@ -166,6 +213,25 @@ define i64 @kshiftl_v64i1_1(<64 x i8> %x, <64 x i8> %y) {
 ; SKX-NEXT:    kmovq %k0, %rax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: kshiftl_v64i1_1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestnmb %ymm1, %ymm1, %k0
+; AVX10-256-NEXT:    vptestnmb %ymm0, %ymm0, %k1
+; AVX10-256-NEXT:    vpmovm2b %k1, %ymm0
+; AVX10-256-NEXT:    vpmovm2b %k0, %ymm1
+; AVX10-256-NEXT:    vmovdqa {{.*#+}} ymm4 = [63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
+; AVX10-256-NEXT:    vpermi2b %ymm0, %ymm1, %ymm4
+; AVX10-256-NEXT:    vpmovb2m %ymm4, %k2
+; AVX10-256-NEXT:    kshiftld $1, %k1, %k1
+; AVX10-256-NEXT:    vptestnmb %ymm3, %ymm3, %k0 {%k2}
+; AVX10-256-NEXT:    kmovd %k0, %ecx
+; AVX10-256-NEXT:    shlq $32, %rcx
+; AVX10-256-NEXT:    vptestnmb %ymm2, %ymm2, %k0 {%k1}
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    orq %rcx, %rax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %a = icmp eq <64 x i8> %x, zeroinitializer
   %b = shufflevector <64 x i1> %a, <64 x i1> zeroinitializer, <64 x i32> <i32 64, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
   %c = icmp eq <64 x i8> %y, zeroinitializer
@@ -194,6 +260,20 @@ define i8 @kshiftl_v8i1_7(<8 x i64> %x, <8 x i64> %y) {
 ; SKX-NEXT:    # kill: def $al killed $al killed $eax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: kshiftl_v8i1_7:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT:    kshiftlb $7, %k0, %k0
+; AVX10-256-NEXT:    vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT:    vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT:    kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT:    korb %k2, %k1, %k1
+; AVX10-256-NEXT:    kandb %k1, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %a = icmp eq <8 x i64> %x, zeroinitializer
   %b = shufflevector <8 x i1> zeroinitializer, <8 x i1> %a, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
   %c = icmp eq <8 x i64> %y, zeroinitializer
@@ -222,6 +302,19 @@ define i16 @kshiftl_v16i1_15(<16 x i32> %x, <16 x i32> %y) {
 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: kshiftl_v16i1_15:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT:    kshiftlw $15, %k0, %k0
+; AVX10-256-NEXT:    vptestnmd %ymm2, %ymm2, %k1
+; AVX10-256-NEXT:    vptestnmd %ymm3, %ymm3, %k2
+; AVX10-256-NEXT:    kunpckbw %k1, %k2, %k1
+; AVX10-256-NEXT:    kandw %k1, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    # kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %a = icmp eq <16 x i32> %x, zeroinitializer
   %b = shufflevector <16 x i1> zeroinitializer, <16 x i1> %a, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
   %c = icmp eq <16 x i32> %y, zeroinitializer
@@ -255,6 +348,18 @@ define i32 @kshiftl_v32i1_31(<32 x i16> %x, <32 x i16> %y) {
 ; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: kshiftl_v32i1_31:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestnmw %ymm0, %ymm0, %k0
+; AVX10-256-NEXT:    kshiftld $31, %k0, %k0
+; AVX10-256-NEXT:    vptestnmw %ymm2, %ymm2, %k1
+; AVX10-256-NEXT:    vptestnmw %ymm3, %ymm3, %k2
+; AVX10-256-NEXT:    kunpckwd %k1, %k2, %k1
+; AVX10-256-NEXT:    kandd %k1, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %a = icmp eq <32 x i16> %x, zeroinitializer
   %b = shufflevector <32 x i1> zeroinitializer, <32 x i1> %a, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32>
   %c = icmp eq <32 x i16> %y, zeroinitializer
@@ -291,6 +396,16 @@ define i64 @kshiftl_v64i1_63(<64 x i8> %x, <64 x i8> %y) {
 ; SKX-NEXT:    kmovq %k0, %rax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: kshiftl_v64i1_63:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestnmb %ymm0, %ymm0, %k0
+; AVX10-256-NEXT:    kshiftld $31, %k0, %k1
+; AVX10-256-NEXT:    vptestnmb %ymm3, %ymm3, %k0 {%k1}
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    shlq $32, %rax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %a = icmp eq <64 x i8> %x, zeroinitializer
   %b = shufflevector <64 x i1> zeroinitializer, <64 x i1> %a, <64 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64>
   %c = icmp eq <64 x i8> %y, zeroinitializer
@@ -320,6 +435,23 @@ define i8 @kshiftr_v8i1_1(<8 x i64> %x, <8 x i64> %y) {
 ; SKX-NEXT:    # kill: def $al killed $al killed $eax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: kshiftr_v8i1_1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT:    vptestnmq %ymm1, %ymm1, %k1
+; AVX10-256-NEXT:    kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT:    korb %k1, %k0, %k0
+; AVX10-256-NEXT:    kshiftrb $1, %k0, %k0
+; AVX10-256-NEXT:    vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT:    vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT:    kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT:    korb %k2, %k1, %k1
+; AVX10-256-NEXT:    kandb %k1, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %a = icmp eq <8 x i64> %x, zeroinitializer
   %b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
   %c = icmp eq <8 x i64> %y, zeroinitializer
@@ -348,6 +480,21 @@ define i16 @kshiftr_v16i1_1(<16 x i32> %x, <16 x i32> %y) {
 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: kshiftr_v16i1_1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestnmd %ymm0, %ymm0, %k0
+; AVX10-256-NEXT:    vptestnmd %ymm1, %ymm1, %k1
+; AVX10-256-NEXT:    kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT:    kshiftrw $1, %k0, %k0
+; AVX10-256-NEXT:    vptestnmd %ymm2, %ymm2, %k1
+; AVX10-256-NEXT:    vptestnmd %ymm3, %ymm3, %k2
+; AVX10-256-NEXT:    kunpckbw %k1, %k2, %k1
+; AVX10-256-NEXT:    kandw %k1, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    # kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %a = icmp eq <16 x i32> %x, zeroinitializer
   %b = shufflevector <16 x i1> %a, <16 x i1> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
   %c = icmp eq <16 x i32> %y, zeroinitializer
@@ -394,6 +541,20 @@ define i32 @kshiftr_v32i1_1(<32 x i16> %x, <32 x i16> %y) {
 ; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: kshiftr_v32i1_1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestnmw %ymm0, %ymm0, %k0
+; AVX10-256-NEXT:    vptestnmw %ymm1, %ymm1, %k1
+; AVX10-256-NEXT:    kunpckwd %k0, %k1, %k0
+; AVX10-256-NEXT:    kshiftrd $1, %k0, %k0
+; AVX10-256-NEXT:    vptestnmw %ymm2, %ymm2, %k1
+; AVX10-256-NEXT:    vptestnmw %ymm3, %ymm3, %k2
+; AVX10-256-NEXT:    kunpckwd %k1, %k2, %k1
+; AVX10-256-NEXT:    kandd %k1, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %a = icmp eq <32 x i16> %x, zeroinitializer
   %b = shufflevector <32 x i1> %a, <32 x i1> zeroinitializer, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32>
   %c = icmp eq <32 x i16> %y, zeroinitializer
@@ -464,6 +625,25 @@ define i64 @kshiftr_v64i1_1(<64 x i8> %x, <64 x i8> %y) {
 ; SKX-NEXT:    kmovq %k0, %rax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: kshiftr_v64i1_1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestnmb %ymm0, %ymm0, %k0
+; AVX10-256-NEXT:    vptestnmb %ymm1, %ymm1, %k1
+; AVX10-256-NEXT:    vpmovm2b %k1, %ymm0
+; AVX10-256-NEXT:    vpmovm2b %k0, %ymm1
+; AVX10-256-NEXT:    vmovdqa {{.*#+}} ymm4 = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32]
+; AVX10-256-NEXT:    vpermi2b %ymm0, %ymm1, %ymm4
+; AVX10-256-NEXT:    vpmovb2m %ymm4, %k2
+; AVX10-256-NEXT:    kshiftrd $1, %k1, %k1
+; AVX10-256-NEXT:    vptestnmb %ymm2, %ymm2, %k0 {%k2}
+; AVX10-256-NEXT:    kmovd %k0, %ecx
+; AVX10-256-NEXT:    vptestnmb %ymm3, %ymm3, %k0 {%k1}
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    shlq $32, %rax
+; AVX10-256-NEXT:    orq %rcx, %rax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %a = icmp eq <64 x i8> %x, zeroinitializer
   %b = shufflevector <64 x i1> %a, <64 x i1> zeroinitializer, <64 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64>
   %c = icmp eq <64 x i8> %y, zeroinitializer
@@ -492,6 +672,23 @@ define i8 @kshiftr_v8i1_7(<8 x i64> %x, <8 x i64> %y) {
 ; SKX-NEXT:    # kill: def $al killed $al killed $eax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: kshiftr_v8i1_7:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT:    vptestnmq %ymm1, %ymm1, %k1
+; AVX10-256-NEXT:    kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT:    korb %k1, %k0, %k0
+; AVX10-256-NEXT:    kshiftlb $1, %k0, %k0
+; AVX10-256-NEXT:    vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT:    vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT:    kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT:    korb %k2, %k1, %k1
+; AVX10-256-NEXT:    kandb %k1, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %a = icmp eq <8 x i64> %x, zeroinitializer
   %b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
   %c = icmp eq <8 x i64> %y, zeroinitializer
@@ -520,6 +717,20 @@ define i16 @kshiftr_v16i1_15(<16 x i32> %x, <16 x i32> %y) {
 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: kshiftr_v16i1_15:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestnmd %ymm1, %ymm1, %k0
+; AVX10-256-NEXT:    kshiftlw $8, %k0, %k0
+; AVX10-256-NEXT:    kshiftrw $15, %k0, %k0
+; AVX10-256-NEXT:    vptestnmd %ymm2, %ymm2, %k1
+; AVX10-256-NEXT:    vptestnmd %ymm3, %ymm3, %k2
+; AVX10-256-NEXT:    kunpckbw %k1, %k2, %k1
+; AVX10-256-NEXT:    kandw %k1, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    # kill: def $ax killed $ax killed $eax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %a = icmp eq <16 x i32> %x, zeroinitializer
   %b = shufflevector <16 x i1> zeroinitializer, <16 x i1> %a, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
   %c = icmp eq <16 x i32> %y, zeroinitializer
@@ -552,6 +763,19 @@ define i32 @kshiftr_v32i1_31(<32 x i16> %x, <32 x i16> %y) {
 ; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: kshiftr_v32i1_31:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestnmw %ymm1, %ymm1, %k0
+; AVX10-256-NEXT:    kshiftld $16, %k0, %k0
+; AVX10-256-NEXT:    kshiftrd $31, %k0, %k0
+; AVX10-256-NEXT:    vptestnmw %ymm2, %ymm2, %k1
+; AVX10-256-NEXT:    vptestnmw %ymm3, %ymm3, %k2
+; AVX10-256-NEXT:    kunpckwd %k1, %k2, %k1
+; AVX10-256-NEXT:    kandd %k1, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %a = icmp eq <32 x i16> %x, zeroinitializer
   %b = shufflevector <32 x i1> zeroinitializer, <32 x i1> %a, <32 x i32> <i32 63, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
   %c = icmp eq <32 x i16> %y, zeroinitializer
@@ -586,6 +810,15 @@ define i64 @kshiftr_v64i1_63(<64 x i8> %x, <64 x i8> %y) {
 ; SKX-NEXT:    kmovq %k0, %rax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: kshiftr_v64i1_63:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestnmb %ymm1, %ymm1, %k0
+; AVX10-256-NEXT:    kshiftrd $31, %k0, %k1
+; AVX10-256-NEXT:    vptestnmb %ymm2, %ymm2, %k0 {%k1}
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %a = icmp eq <64 x i8> %x, zeroinitializer
   %b = shufflevector <64 x i1> zeroinitializer, <64 x i1> %a, <64 x i32> <i32 127, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
   %c = icmp eq <64 x i8> %y, zeroinitializer
@@ -614,6 +847,23 @@ define i8 @kshiftl_v8i1_zu123u56(<8 x i64> %x, <8 x i64> %y) {
 ; SKX-NEXT:    # kill: def $al killed $al killed $eax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: kshiftl_v8i1_zu123u56:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT:    vptestnmq %ymm1, %ymm1, %k1
+; AVX10-256-NEXT:    kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT:    korb %k1, %k0, %k0
+; AVX10-256-NEXT:    kshiftlb $1, %k0, %k0
+; AVX10-256-NEXT:    vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT:    vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT:    kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT:    korb %k2, %k1, %k1
+; AVX10-256-NEXT:    kandb %k1, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %a = icmp eq <8 x i64> %x, zeroinitializer
   %b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 8, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 5, i32 6>
   %c = icmp eq <8 x i64> %y, zeroinitializer
@@ -642,6 +892,23 @@ define i8 @kshiftl_v8i1_u0123456(<8 x i64> %x, <8 x i64> %y) {
 ; SKX-NEXT:    # kill: def $al killed $al killed $eax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: kshiftl_v8i1_u0123456:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT:    vptestnmq %ymm1, %ymm1, %k1
+; AVX10-256-NEXT:    kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT:    korb %k1, %k0, %k0
+; AVX10-256-NEXT:    kshiftlb $1, %k0, %k0
+; AVX10-256-NEXT:    vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT:    vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT:    kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT:    korb %k2, %k1, %k1
+; AVX10-256-NEXT:    kandb %k1, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %a = icmp eq <8 x i64> %x, zeroinitializer
   %b = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
   %c = icmp eq <8 x i64> %y, zeroinitializer
@@ -671,6 +938,23 @@ define i8 @kshiftr_v8i1_1u3u567z(<8 x i64> %x, <8 x i64> %y) {
 ; SKX-NEXT:    # kill: def $al killed $al killed $eax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: kshiftr_v8i1_1u3u567z:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT:    vptestnmq %ymm1, %ymm1, %k1
+; AVX10-256-NEXT:    kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT:    korb %k1, %k0, %k0
+; AVX10-256-NEXT:    kshiftrb $1, %k0, %k0
+; AVX10-256-NEXT:    vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT:    vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT:    kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT:    korb %k2, %k1, %k1
+; AVX10-256-NEXT:    kandb %k1, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %a = icmp eq <8 x i64> %x, zeroinitializer
   %b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 undef, i32 5, i32 6, i32 7, i32 8>
   %c = icmp eq <8 x i64> %y, zeroinitializer
@@ -699,6 +983,23 @@ define i8 @kshiftr_v8i1_234567uu(<8 x i64> %x, <8 x i64> %y) {
 ; SKX-NEXT:    # kill: def $al killed $al killed $eax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: kshiftr_v8i1_234567uu:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestnmq %ymm0, %ymm0, %k0
+; AVX10-256-NEXT:    vptestnmq %ymm1, %ymm1, %k1
+; AVX10-256-NEXT:    kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT:    korb %k1, %k0, %k0
+; AVX10-256-NEXT:    kshiftrb $2, %k0, %k0
+; AVX10-256-NEXT:    vptestnmq %ymm2, %ymm2, %k1
+; AVX10-256-NEXT:    vptestnmq %ymm3, %ymm3, %k2
+; AVX10-256-NEXT:    kshiftlb $4, %k2, %k2
+; AVX10-256-NEXT:    korb %k2, %k1, %k1
+; AVX10-256-NEXT:    kandb %k1, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %a = icmp eq <8 x i64> %x, zeroinitializer
   %b = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 10>
   %c = icmp eq <8 x i64> %y, zeroinitializer
diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index f26bbb7e5c2bda..53f99431598bf1 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -3,8 +3,9 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,KNL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,EVEX512,KNL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,EVEX512,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=AVX,AVX512,AVX10-256
 
 define i1 @allones_v16i8_sign(<16 x i8> %arg) {
 ; SSE-LABEL: allones_v16i8_sign:
@@ -176,6 +177,15 @@ define i1 @allones_v64i8_sign(<64 x i8> %arg) {
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v64i8_sign:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpand %ymm0, %ymm1, %ymm0
+; AVX10-256-NEXT:    vpmovb2m %ymm0, %k0
+; AVX10-256-NEXT:    kortestd %k0, %k0
+; AVX10-256-NEXT:    setb %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = icmp slt <64 x i8> %arg, zeroinitializer
   %tmp1 = bitcast <64 x i1> %tmp to i64
   %tmp2 = icmp eq i64 %tmp1, -1
@@ -232,6 +242,15 @@ define i1 @allzeros_v64i8_sign(<64 x i8> %arg) {
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allzeros_v64i8_sign:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT:    vpmovb2m %ymm0, %k0
+; AVX10-256-NEXT:    kortestd %k0, %k0
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = icmp slt <64 x i8> %arg, zeroinitializer
   %tmp1 = bitcast <64 x i1> %tmp to i64
   %tmp2 = icmp eq i64 %tmp1, 0
@@ -273,6 +292,13 @@ define i1 @allones_v8i16_sign(<8 x i16> %arg) {
 ; SKX-NEXT:    kortestb %k0, %k0
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v8i16_sign:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpmovw2m %xmm0, %k0
+; AVX10-256-NEXT:    kortestb %k0, %k0
+; AVX10-256-NEXT:    setb %al
+; AVX10-256-NEXT:    retq
   %tmp = icmp slt <8 x i16> %arg, zeroinitializer
   %tmp1 = bitcast <8 x i1> %tmp to i8
   %tmp2 = icmp eq i8 %tmp1, -1
@@ -311,6 +337,13 @@ define i1 @allzeros_v8i16_sign(<8 x i16> %arg) {
 ; SKX-NEXT:    kortestb %k0, %k0
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allzeros_v8i16_sign:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpmovw2m %xmm0, %k0
+; AVX10-256-NEXT:    kortestb %k0, %k0
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    retq
   %tmp = icmp slt <8 x i16> %arg, zeroinitializer
   %tmp1 = bitcast <8 x i1> %tmp to i8
   %tmp2 = icmp eq i8 %tmp1, 0
@@ -364,6 +397,14 @@ define i1 @allones_v16i16_sign(<16 x i16> %arg) {
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v16i16_sign:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpmovw2m %ymm0, %k0
+; AVX10-256-NEXT:    kortestw %k0, %k0
+; AVX10-256-NEXT:    setb %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = icmp slt <16 x i16> %arg, zeroinitializer
   %tmp1 = bitcast <16 x i1> %tmp to i16
   %tmp2 = icmp eq i16 %tmp1, -1
@@ -415,6 +456,14 @@ define i1 @allzeros_v16i16_sign(<16 x i16> %arg) {
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allzeros_v16i16_sign:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpmovw2m %ymm0, %k0
+; AVX10-256-NEXT:    kortestw %k0, %k0
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = icmp slt <16 x i16> %arg, zeroinitializer
   %tmp1 = bitcast <16 x i1> %tmp to i16
   %tmp2 = icmp eq i16 %tmp1, 0
@@ -479,6 +528,16 @@ define i1 @allones_v32i16_sign(<32 x i16> %arg) {
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v32i16_sign:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpmovw2m %ymm0, %k0
+; AVX10-256-NEXT:    vpmovw2m %ymm1, %k1
+; AVX10-256-NEXT:    kunpckwd %k0, %k1, %k0
+; AVX10-256-NEXT:    kortestd %k0, %k0
+; AVX10-256-NEXT:    setb %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = icmp slt <32 x i16> %arg, zeroinitializer
   %tmp1 = bitcast <32 x i1> %tmp to i32
   %tmp2 = icmp eq i32 %tmp1, -1
@@ -539,6 +598,16 @@ define i1 @allzeros_v32i16_sign(<32 x i16> %arg) {
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allzeros_v32i16_sign:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpmovw2m %ymm0, %k0
+; AVX10-256-NEXT:    vpmovw2m %ymm1, %k1
+; AVX10-256-NEXT:    kunpckwd %k0, %k1, %k0
+; AVX10-256-NEXT:    kortestd %k0, %k0
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = icmp slt <32 x i16> %arg, zeroinitializer
   %tmp1 = bitcast <32 x i1> %tmp to i32
   %tmp2 = icmp eq i32 %tmp1, 0
@@ -697,6 +766,16 @@ define i1 @allones_v16i32_sign(<16 x i32> %arg) {
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v16i32_sign:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpmovd2m %ymm0, %k0
+; AVX10-256-NEXT:    vpmovd2m %ymm1, %k1
+; AVX10-256-NEXT:    kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT:    kortestw %k0, %k0
+; AVX10-256-NEXT:    setb %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = icmp slt <16 x i32> %arg, zeroinitializer
   %tmp1 = bitcast <16 x i1> %tmp to i16
   %tmp2 = icmp eq i16 %tmp1, -1
@@ -755,6 +834,16 @@ define i1 @allzeros_v16i32_sign(<16 x i32> %arg) {
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allzeros_v16i32_sign:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpmovd2m %ymm0, %k0
+; AVX10-256-NEXT:    vpmovd2m %ymm1, %k1
+; AVX10-256-NEXT:    kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT:    kortestw %k0, %k0
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = icmp slt <16 x i32> %arg, zeroinitializer
   %tmp1 = bitcast <16 x i1> %tmp to i16
   %tmp2 = icmp eq i16 %tmp1, 0
@@ -872,6 +961,17 @@ define i1 @allones_v8i64_sign(<8 x i64> %arg) {
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v8i64_sign:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpmovq2m %ymm0, %k0
+; AVX10-256-NEXT:    vpmovq2m %ymm1, %k1
+; AVX10-256-NEXT:    kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT:    korb %k1, %k0, %k0
+; AVX10-256-NEXT:    kortestb %k0, %k0
+; AVX10-256-NEXT:    setb %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = icmp slt <8 x i64> %arg, zeroinitializer
   %tmp1 = bitcast <8 x i1> %tmp to i8
   %tmp2 = icmp eq i8 %tmp1, -1
@@ -926,6 +1026,17 @@ define i1 @allzeros_v8i64_sign(<8 x i64> %arg) {
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allzeros_v8i64_sign:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpmovq2m %ymm0, %k0
+; AVX10-256-NEXT:    vpmovq2m %ymm1, %k1
+; AVX10-256-NEXT:    kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT:    korb %k1, %k0, %k0
+; AVX10-256-NEXT:    kortestb %k0, %k0
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = icmp slt <8 x i64> %arg, zeroinitializer
   %tmp1 = bitcast <8 x i1> %tmp to i8
   %tmp2 = icmp eq i8 %tmp1, 0
@@ -963,6 +1074,13 @@ define i1 @allones_v16i8_and1(<16 x i8> %arg) {
 ; SKX-NEXT:    kortestw %k0, %k0
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v16i8_and1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; AVX10-256-NEXT:    kortestw %k0, %k0
+; AVX10-256-NEXT:    setb %al
+; AVX10-256-NEXT:    retq
   %tmp = and <16 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
   %tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -1092,13 +1210,21 @@ define i1 @allzeros_v8i64_not(<8 x i64> %a0) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: allzeros_v8i64_not:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; AVX512-NEXT:    kortestw %k0, %k0
-; AVX512-NEXT:    setne %al
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; EVEX512-LABEL: allzeros_v8i64_not:
+; EVEX512:       # %bb.0:
+; EVEX512-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; EVEX512-NEXT:    kortestw %k0, %k0
+; EVEX512-NEXT:    setne %al
+; EVEX512-NEXT:    vzeroupper
+; EVEX512-NEXT:    retq
+;
+; AVX10-256-LABEL: allzeros_v8i64_not:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT:    vptest %ymm0, %ymm0
+; AVX10-256-NEXT:    setne %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %1 = icmp eq <8 x i64> %a0, zeroinitializer
   %2 = bitcast <8 x i1> %1 to i8
   %3 = icmp ne i8 %2, -1
@@ -1138,6 +1264,13 @@ define i1 @allzeros_v16i8_and1(<16 x i8> %arg) {
 ; SKX-NEXT:    vptest %xmm1, %xmm0
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allzeros_v16i8_and1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [72340172838076673,72340172838076673]
+; AVX10-256-NEXT:    vptest %xmm1, %xmm0
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    retq
   %tmp = and <16 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
   %tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -1191,6 +1324,14 @@ define i1 @allones_v32i8_and1(<32 x i8> %arg) {
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v32i8_and1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; AVX10-256-NEXT:    kortestd %k0, %k0
+; AVX10-256-NEXT:    setb %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = and <32 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
   %tmp2 = bitcast <32 x i1> %tmp1 to i32
@@ -1298,6 +1439,16 @@ define i1 @allones_v64i8_and1(<64 x i8> %arg) {
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v64i8_and1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpbroadcastb {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX10-256-NEXT:    vptestmb %ymm2, %ymm0, %k1
+; AVX10-256-NEXT:    vptestmb %ymm2, %ymm1, %k0 {%k1}
+; AVX10-256-NEXT:    kortestd %k0, %k0
+; AVX10-256-NEXT:    setb %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = and <64 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
   %tmp2 = bitcast <64 x i1> %tmp1 to i64
@@ -1343,13 +1494,22 @@ define i1 @allzeros_v64i8_and1(<64 x i8> %arg) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: allzeros_v64i8_and1:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
-; AVX512-NEXT:    kortestw %k0, %k0
-; AVX512-NEXT:    sete %al
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; EVEX512-LABEL: allzeros_v64i8_and1:
+; EVEX512:       # %bb.0:
+; EVEX512-NEXT:    vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
+; EVEX512-NEXT:    kortestw %k0, %k0
+; EVEX512-NEXT:    sete %al
+; EVEX512-NEXT:    vzeroupper
+; EVEX512-NEXT:    retq
+;
+; AVX10-256-LABEL: allzeros_v64i8_and1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673]
+; AVX10-256-NEXT:    vptest %ymm1, %ymm0
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = and <64 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
   %tmp2 = bitcast <64 x i1> %tmp1 to i64
@@ -1394,6 +1554,13 @@ define i1 @allones_v8i16_and1(<8 x i16> %arg) {
 ; SKX-NEXT:    kortestb %k0, %k0
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v8i16_and1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; AVX10-256-NEXT:    kortestb %k0, %k0
+; AVX10-256-NEXT:    setb %al
+; AVX10-256-NEXT:    retq
   %tmp = and <8 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
   %tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -1434,6 +1601,13 @@ define i1 @allzeros_v8i16_and1(<8 x i16> %arg) {
 ; SKX-NEXT:    vptest %xmm1, %xmm0
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allzeros_v8i16_and1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [281479271743489,281479271743489]
+; AVX10-256-NEXT:    vptest %xmm1, %xmm0
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    retq
   %tmp = and <8 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
   %tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -1493,6 +1667,14 @@ define i1 @allones_v16i16_and1(<16 x i16> %arg) {
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v16i16_and1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; AVX10-256-NEXT:    kortestw %k0, %k0
+; AVX10-256-NEXT:    setb %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = and <16 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
   %tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -1565,6 +1747,17 @@ define i1 @allones_v32i16_and1(<32 x i16> %arg) {
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v32i16_and1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpbroadcastw {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX10-256-NEXT:    vptestmw %ymm2, %ymm0, %k0
+; AVX10-256-NEXT:    vptestmw %ymm2, %ymm1, %k1
+; AVX10-256-NEXT:    kunpckwd %k0, %k1, %k0
+; AVX10-256-NEXT:    kortestd %k0, %k0
+; AVX10-256-NEXT:    setb %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = and <32 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
   %tmp2 = bitcast <32 x i1> %tmp1 to i32
@@ -1610,13 +1803,22 @@ define i1 @allzeros_v32i16_and1(<32 x i16> %arg) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: allzeros_v32i16_and1:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
-; AVX512-NEXT:    kortestw %k0, %k0
-; AVX512-NEXT:    sete %al
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; EVEX512-LABEL: allzeros_v32i16_and1:
+; EVEX512:       # %bb.0:
+; EVEX512-NEXT:    vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
+; EVEX512-NEXT:    kortestw %k0, %k0
+; EVEX512-NEXT:    sete %al
+; EVEX512-NEXT:    vzeroupper
+; EVEX512-NEXT:    retq
+;
+; AVX10-256-LABEL: allzeros_v32i16_and1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489]
+; AVX10-256-NEXT:    vptest %ymm1, %ymm0
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = and <32 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
   %tmp2 = bitcast <32 x i1> %tmp1 to i32
@@ -1704,6 +1906,14 @@ define i1 @allones_v4i32_and1(<4 x i32> %arg) {
 ; SKX-NEXT:    cmpb $15, %al
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v4i32_and1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    cmpb $15, %al
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    retq
   %tmp = and <4 x i32> %arg, <i32 1, i32 1, i32 1, i32 1>
   %tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
   %tmp2 = bitcast <4 x i1> %tmp1 to i4
@@ -1744,6 +1954,13 @@ define i1 @allzeros_v4i32_and1(<4 x i32> %arg) {
 ; SKX-NEXT:    vptest %xmm1, %xmm0
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allzeros_v4i32_and1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4294967297,4294967297]
+; AVX10-256-NEXT:    vptest %xmm1, %xmm0
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    retq
   %tmp = and <4 x i32> %arg, <i32 1, i32 1, i32 1, i32 1>
   %tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
   %tmp2 = bitcast <4 x i1> %tmp1 to i4
@@ -1800,6 +2017,14 @@ define i1 @allones_v8i32_and1(<8 x i32> %arg) {
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v8i32_and1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
+; AVX10-256-NEXT:    kortestb %k0, %k0
+; AVX10-256-NEXT:    setb %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = and <8 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
   %tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -1898,13 +2123,24 @@ define i1 @allones_v16i32_and1(<16 x i32> %arg) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: allones_v16i32_and1:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
-; AVX512-NEXT:    kortestw %k0, %k0
-; AVX512-NEXT:    setb %al
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; EVEX512-LABEL: allones_v16i32_and1:
+; EVEX512:       # %bb.0:
+; EVEX512-NEXT:    vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
+; EVEX512-NEXT:    kortestw %k0, %k0
+; EVEX512-NEXT:    setb %al
+; EVEX512-NEXT:    vzeroupper
+; EVEX512-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v16i32_and1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpbroadcastd {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
+; AVX10-256-NEXT:    vptestmd %ymm2, %ymm0, %k0
+; AVX10-256-NEXT:    vptestmd %ymm2, %ymm1, %k1
+; AVX10-256-NEXT:    kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT:    kortestw %k0, %k0
+; AVX10-256-NEXT:    setb %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = and <16 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
   %tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -1950,13 +2186,22 @@ define i1 @allzeros_v16i32_and1(<16 x i32> %arg) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: allzeros_v16i32_and1:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
-; AVX512-NEXT:    kortestw %k0, %k0
-; AVX512-NEXT:    sete %al
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; EVEX512-LABEL: allzeros_v16i32_and1:
+; EVEX512:       # %bb.0:
+; EVEX512-NEXT:    vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
+; EVEX512-NEXT:    kortestw %k0, %k0
+; EVEX512-NEXT:    sete %al
+; EVEX512-NEXT:    vzeroupper
+; EVEX512-NEXT:    retq
+;
+; AVX10-256-LABEL: allzeros_v16i32_and1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297]
+; AVX10-256-NEXT:    vptest %ymm1, %ymm0
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = and <16 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
   %tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -1999,6 +2244,14 @@ define i1 @allones_v2i64_and1(<2 x i64> %arg) {
 ; SKX-NEXT:    cmpb $3, %al
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v2i64_and1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    cmpb $3, %al
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    retq
   %tmp = and <2 x i64> %arg, <i64 1, i64 1>
   %tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
   %tmp2 = bitcast <2 x i1> %tmp1 to i2
@@ -2039,6 +2292,13 @@ define i1 @allzeros_v2i64_and1(<2 x i64> %arg) {
 ; SKX-NEXT:    vptest %xmm1, %xmm0
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allzeros_v2i64_and1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [1,1]
+; AVX10-256-NEXT:    vptest %xmm1, %xmm0
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    retq
   %tmp = and <2 x i64> %arg, <i64 1, i64 1>
   %tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
   %tmp2 = bitcast <2 x i1> %tmp1 to i2
@@ -2095,6 +2355,15 @@ define i1 @allones_v4i64_and1(<4 x i64> %arg) {
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v4i64_and1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    cmpb $15, %al
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = and <4 x i64> %arg, <i64 1, i64 1, i64 1, i64 1>
   %tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
   %tmp2 = bitcast <4 x i1> %tmp1 to i4
@@ -2208,6 +2477,18 @@ define i1 @allones_v8i64_and1(<8 x i64> %arg) {
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v8i64_and1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
+; AVX10-256-NEXT:    vptestmq %ymm2, %ymm0, %k0
+; AVX10-256-NEXT:    vptestmq %ymm2, %ymm1, %k1
+; AVX10-256-NEXT:    kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT:    korb %k1, %k0, %k0
+; AVX10-256-NEXT:    kortestb %k0, %k0
+; AVX10-256-NEXT:    setb %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = and <8 x i64> %arg, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
   %tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
   %tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -2253,14 +2534,23 @@ define i1 @allzeros_v8i64_and1(<8 x i64> %arg) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: allzeros_v8i64_and1:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpbroadcastq {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1]
-; AVX512-NEXT:    vptestmd %zmm1, %zmm0, %k0
-; AVX512-NEXT:    kortestw %k0, %k0
-; AVX512-NEXT:    sete %al
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; EVEX512-LABEL: allzeros_v8i64_and1:
+; EVEX512:       # %bb.0:
+; EVEX512-NEXT:    vpbroadcastq {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1]
+; EVEX512-NEXT:    vptestmd %zmm1, %zmm0, %k0
+; EVEX512-NEXT:    kortestw %k0, %k0
+; EVEX512-NEXT:    sete %al
+; EVEX512-NEXT:    vzeroupper
+; EVEX512-NEXT:    retq
+;
+; AVX10-256-LABEL: allzeros_v8i64_and1:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
+; AVX10-256-NEXT:    vptest %ymm1, %ymm0
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = and <8 x i64> %arg, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
   %tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
   %tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -2299,6 +2589,13 @@ define i1 @allones_v16i8_and4(<16 x i8> %arg) {
 ; SKX-NEXT:    kortestw %k0, %k0
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v16i8_and4:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; AVX10-256-NEXT:    kortestw %k0, %k0
+; AVX10-256-NEXT:    setb %al
+; AVX10-256-NEXT:    retq
   %tmp = and <16 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
   %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
   %tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -2339,6 +2636,13 @@ define i1 @allzeros_v16i8_and4(<16 x i8> %arg) {
 ; SKX-NEXT:    vptest %xmm1, %xmm0
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allzeros_v16i8_and4:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [289360691352306692,289360691352306692]
+; AVX10-256-NEXT:    vptest %xmm1, %xmm0
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    retq
   %tmp = and <16 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
   %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
   %tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -2392,6 +2696,14 @@ define i1 @allones_v32i8_and4(<32 x i8> %arg) {
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v32i8_and4:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; AVX10-256-NEXT:    kortestd %k0, %k0
+; AVX10-256-NEXT:    setb %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = and <32 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
   %tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
   %tmp2 = bitcast <32 x i1> %tmp1 to i32
@@ -2499,6 +2811,16 @@ define i1 @allones_v64i8_and4(<64 x i8> %arg) {
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v64i8_and4:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpbroadcastb {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
+; AVX10-256-NEXT:    vptestmb %ymm2, %ymm0, %k1
+; AVX10-256-NEXT:    vptestmb %ymm2, %ymm1, %k0 {%k1}
+; AVX10-256-NEXT:    kortestd %k0, %k0
+; AVX10-256-NEXT:    setb %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = and <64 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
   %tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
   %tmp2 = bitcast <64 x i1> %tmp1 to i64
@@ -2544,13 +2866,22 @@ define i1 @allzeros_v64i8_and4(<64 x i8> %arg) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: allzeros_v64i8_and4:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
-; AVX512-NEXT:    kortestw %k0, %k0
-; AVX512-NEXT:    sete %al
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; EVEX512-LABEL: allzeros_v64i8_and4:
+; EVEX512:       # %bb.0:
+; EVEX512-NEXT:    vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
+; EVEX512-NEXT:    kortestw %k0, %k0
+; EVEX512-NEXT:    sete %al
+; EVEX512-NEXT:    vzeroupper
+; EVEX512-NEXT:    retq
+;
+; AVX10-256-LABEL: allzeros_v64i8_and4:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [289360691352306692,289360691352306692,289360691352306692,289360691352306692]
+; AVX10-256-NEXT:    vptest %ymm1, %ymm0
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = and <64 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
   %tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
   %tmp2 = bitcast <64 x i1> %tmp1 to i64
@@ -2595,6 +2926,13 @@ define i1 @allones_v8i16_and4(<8 x i16> %arg) {
 ; SKX-NEXT:    kortestb %k0, %k0
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v8i16_and4:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
+; AVX10-256-NEXT:    kortestb %k0, %k0
+; AVX10-256-NEXT:    setb %al
+; AVX10-256-NEXT:    retq
   %tmp = and <8 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
   %tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
   %tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -2635,6 +2973,13 @@ define i1 @allzeros_v8i16_and4(<8 x i16> %arg) {
 ; SKX-NEXT:    vptest %xmm1, %xmm0
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allzeros_v8i16_and4:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [1125917086973956,1125917086973956]
+; AVX10-256-NEXT:    vptest %xmm1, %xmm0
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    retq
   %tmp = and <8 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
   %tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
   %tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -2694,6 +3039,14 @@ define i1 @allones_v16i16_and4(<16 x i16> %arg) {
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v16i16_and4:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestmw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+; AVX10-256-NEXT:    kortestw %k0, %k0
+; AVX10-256-NEXT:    setb %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = and <16 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
   %tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
   %tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -2766,6 +3119,17 @@ define i1 @allones_v32i16_and4(<32 x i16> %arg) {
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v32i16_and4:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpbroadcastw {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
+; AVX10-256-NEXT:    vptestmw %ymm2, %ymm0, %k0
+; AVX10-256-NEXT:    vptestmw %ymm2, %ymm1, %k1
+; AVX10-256-NEXT:    kunpckwd %k0, %k1, %k0
+; AVX10-256-NEXT:    kortestd %k0, %k0
+; AVX10-256-NEXT:    setb %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = and <32 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
   %tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
   %tmp2 = bitcast <32 x i1> %tmp1 to i32
@@ -2811,13 +3175,22 @@ define i1 @allzeros_v32i16_and4(<32 x i16> %arg) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: allzeros_v32i16_and4:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
-; AVX512-NEXT:    kortestw %k0, %k0
-; AVX512-NEXT:    sete %al
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; EVEX512-LABEL: allzeros_v32i16_and4:
+; EVEX512:       # %bb.0:
+; EVEX512-NEXT:    vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
+; EVEX512-NEXT:    kortestw %k0, %k0
+; EVEX512-NEXT:    sete %al
+; EVEX512-NEXT:    vzeroupper
+; EVEX512-NEXT:    retq
+;
+; AVX10-256-LABEL: allzeros_v32i16_and4:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [1125917086973956,1125917086973956,1125917086973956,1125917086973956]
+; AVX10-256-NEXT:    vptest %ymm1, %ymm0
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = and <32 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
   %tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
   %tmp2 = bitcast <32 x i1> %tmp1 to i32
@@ -2905,6 +3278,14 @@ define i1 @allones_v4i32_and4(<4 x i32> %arg) {
 ; SKX-NEXT:    cmpb $15, %al
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v4i32_and4:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    cmpb $15, %al
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    retq
   %tmp = and <4 x i32> %arg, <i32 4, i32 4, i32 4, i32 4>
   %tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
   %tmp2 = bitcast <4 x i1> %tmp1 to i4
@@ -2945,6 +3326,13 @@ define i1 @allzeros_v4i32_and4(<4 x i32> %arg) {
 ; SKX-NEXT:    vptest %xmm1, %xmm0
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allzeros_v4i32_and4:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [17179869188,17179869188]
+; AVX10-256-NEXT:    vptest %xmm1, %xmm0
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    retq
   %tmp = and <4 x i32> %arg, <i32 4, i32 4, i32 4, i32 4>
   %tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
   %tmp2 = bitcast <4 x i1> %tmp1 to i4
@@ -3001,6 +3389,14 @@ define i1 @allones_v8i32_and4(<8 x i32> %arg) {
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v8i32_and4:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
+; AVX10-256-NEXT:    kortestb %k0, %k0
+; AVX10-256-NEXT:    setb %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = and <8 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
   %tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
   %tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -3099,13 +3495,24 @@ define i1 @allones_v16i32_and4(<16 x i32> %arg) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: allones_v16i32_and4:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
-; AVX512-NEXT:    kortestw %k0, %k0
-; AVX512-NEXT:    setb %al
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; EVEX512-LABEL: allones_v16i32_and4:
+; EVEX512:       # %bb.0:
+; EVEX512-NEXT:    vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
+; EVEX512-NEXT:    kortestw %k0, %k0
+; EVEX512-NEXT:    setb %al
+; EVEX512-NEXT:    vzeroupper
+; EVEX512-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v16i32_and4:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpbroadcastd {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4]
+; AVX10-256-NEXT:    vptestmd %ymm2, %ymm0, %k0
+; AVX10-256-NEXT:    vptestmd %ymm2, %ymm1, %k1
+; AVX10-256-NEXT:    kunpckbw %k0, %k1, %k0
+; AVX10-256-NEXT:    kortestw %k0, %k0
+; AVX10-256-NEXT:    setb %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = and <16 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
   %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
   %tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -3151,13 +3558,22 @@ define i1 @allzeros_v16i32_and4(<16 x i32> %arg) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: allzeros_v16i32_and4:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
-; AVX512-NEXT:    kortestw %k0, %k0
-; AVX512-NEXT:    sete %al
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; EVEX512-LABEL: allzeros_v16i32_and4:
+; EVEX512:       # %bb.0:
+; EVEX512-NEXT:    vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
+; EVEX512-NEXT:    kortestw %k0, %k0
+; EVEX512-NEXT:    sete %al
+; EVEX512-NEXT:    vzeroupper
+; EVEX512-NEXT:    retq
+;
+; AVX10-256-LABEL: allzeros_v16i32_and4:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [17179869188,17179869188,17179869188,17179869188]
+; AVX10-256-NEXT:    vptest %ymm1, %ymm0
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = and <16 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
   %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
   %tmp2 = bitcast <16 x i1> %tmp1 to i16
@@ -3200,6 +3616,14 @@ define i1 @allones_v2i64_and4(<2 x i64> %arg) {
 ; SKX-NEXT:    cmpb $3, %al
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v2i64_and4:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    cmpb $3, %al
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    retq
   %tmp = and <2 x i64> %arg, <i64 4, i64 4>
   %tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
   %tmp2 = bitcast <2 x i1> %tmp1 to i2
@@ -3240,6 +3664,13 @@ define i1 @allzeros_v2i64_and4(<2 x i64> %arg) {
 ; SKX-NEXT:    vptest %xmm1, %xmm0
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allzeros_v2i64_and4:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4,4]
+; AVX10-256-NEXT:    vptest %xmm1, %xmm0
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    retq
   %tmp = and <2 x i64> %arg, <i64 4, i64 4>
   %tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
   %tmp2 = bitcast <2 x i1> %tmp1 to i2
@@ -3296,6 +3727,15 @@ define i1 @allones_v4i64_and4(<4 x i64> %arg) {
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v4i64_and4:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    cmpb $15, %al
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = and <4 x i64> %arg, <i64 4, i64 4, i64 4, i64 4>
   %tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
   %tmp2 = bitcast <4 x i1> %tmp1 to i4
@@ -3409,6 +3849,18 @@ define i1 @allones_v8i64_and4(<8 x i64> %arg) {
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allones_v8i64_and4:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [4,4,4,4]
+; AVX10-256-NEXT:    vptestmq %ymm2, %ymm0, %k0
+; AVX10-256-NEXT:    vptestmq %ymm2, %ymm1, %k1
+; AVX10-256-NEXT:    kshiftlb $4, %k1, %k1
+; AVX10-256-NEXT:    korb %k1, %k0, %k0
+; AVX10-256-NEXT:    kortestb %k0, %k0
+; AVX10-256-NEXT:    setb %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = and <8 x i64> %arg, <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
   %tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
   %tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -3454,14 +3906,23 @@ define i1 @allzeros_v8i64_and4(<8 x i64> %arg) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: allzeros_v8i64_and4:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpbroadcastq {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4]
-; AVX512-NEXT:    vptestmd %zmm1, %zmm0, %k0
-; AVX512-NEXT:    kortestw %k0, %k0
-; AVX512-NEXT:    sete %al
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; EVEX512-LABEL: allzeros_v8i64_and4:
+; EVEX512:       # %bb.0:
+; EVEX512-NEXT:    vpbroadcastq {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4]
+; EVEX512-NEXT:    vptestmd %zmm1, %zmm0, %k0
+; EVEX512-NEXT:    kortestw %k0, %k0
+; EVEX512-NEXT:    sete %al
+; EVEX512-NEXT:    vzeroupper
+; EVEX512-NEXT:    retq
+;
+; AVX10-256-LABEL: allzeros_v8i64_and4:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX10-256-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4]
+; AVX10-256-NEXT:    vptest %ymm1, %ymm0
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %tmp = and <8 x i64> %arg, <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
   %tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
   %tmp2 = bitcast <8 x i1> %tmp1 to i8
@@ -3510,6 +3971,15 @@ define i1 @allzeros_v8f32_nnan(<8 x float> %a0) {
 ; SKX-NEXT:    setne %al
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: allzeros_v8f32_nnan:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; AVX10-256-NEXT:    vcmpneqps %ymm1, %ymm0, %k0
+; AVX10-256-NEXT:    kortestb %k0, %k0
+; AVX10-256-NEXT:    setne %al
+; AVX10-256-NEXT:    vzeroupper
+; AVX10-256-NEXT:    retq
   %1 = fcmp nnan une <8 x float> %a0, zeroinitializer
   %2 = bitcast <8 x i1> %1 to i8
   %3 = icmp ne i8 %2, 0
@@ -3709,6 +4179,20 @@ define i1 @movmsk_v16i8(<16 x i8> %x, <16 x i8> %y) {
 ; SKX-NEXT:    andb %cl, %al
 ; SKX-NEXT:    # kill: def $al killed $al killed $eax
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: movmsk_v16i8:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
+; AVX10-256-NEXT:    kshiftrw $15, %k0, %k1
+; AVX10-256-NEXT:    kmovd %k1, %ecx
+; AVX10-256-NEXT:    kshiftrw $8, %k0, %k1
+; AVX10-256-NEXT:    kmovd %k1, %edx
+; AVX10-256-NEXT:    kshiftrw $3, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    xorb %dl, %al
+; AVX10-256-NEXT:    andb %cl, %al
+; AVX10-256-NEXT:    # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT:    retq
   %cmp = icmp eq <16 x i8> %x, %y
   %e1 = extractelement <16 x i1> %cmp, i32 3
   %e2 = extractelement <16 x i1> %cmp, i32 8
@@ -3758,6 +4242,15 @@ define i1 @movmsk_v8i16(<8 x i16> %x, <8 x i16> %y) {
 ; SKX-NEXT:    testb $-109, %al
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: movmsk_v8i16:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0
+; AVX10-256-NEXT:    knotb %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    testb $-109, %al
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    retq
   %cmp = icmp sgt <8 x i16> %x, %y
   %e1 = extractelement <8 x i1> %cmp, i32 0
   %e2 = extractelement <8 x i1> %cmp, i32 1
@@ -3819,6 +4312,17 @@ define i1 @movmsk_v4i32(<4 x i32> %x, <4 x i32> %y) {
 ; SKX-NEXT:    xorb %cl, %al
 ; SKX-NEXT:    # kill: def $al killed $al killed $eax
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: movmsk_v4i32:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpcmpgtd %xmm0, %xmm1, %k0
+; AVX10-256-NEXT:    kshiftrb $3, %k0, %k1
+; AVX10-256-NEXT:    kmovd %k1, %ecx
+; AVX10-256-NEXT:    kshiftrb $2, %k0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    xorb %cl, %al
+; AVX10-256-NEXT:    # kill: def $al killed $al killed $eax
+; AVX10-256-NEXT:    retq
   %cmp = icmp slt <4 x i32> %x, %y
   %e1 = extractelement <4 x i1> %cmp, i32 2
   %e2 = extractelement <4 x i1> %cmp, i32 3
@@ -3870,6 +4374,14 @@ define i1 @movmsk_and_v2i64(<2 x i64> %x, <2 x i64> %y) {
 ; SKX-NEXT:    cmpb $3, %al
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: movmsk_and_v2i64:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vpcmpneqq %xmm1, %xmm0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    cmpb $3, %al
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    retq
   %cmp = icmp ne <2 x i64> %x, %y
   %e1 = extractelement <2 x i1> %cmp, i32 0
   %e2 = extractelement <2 x i1> %cmp, i32 1
@@ -3946,6 +4458,14 @@ define i1 @movmsk_v4f32(<4 x float> %x, <4 x float> %y) {
 ; SKX-NEXT:    testb $14, %al
 ; SKX-NEXT:    setne %al
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: movmsk_v4f32:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vcmpeq_uqps %xmm1, %xmm0, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    testb $14, %al
+; AVX10-256-NEXT:    setne %al
+; AVX10-256-NEXT:    retq
   %cmp = fcmp ueq <4 x float> %x, %y
   %e1 = extractelement <4 x i1> %cmp, i32 1
   %e2 = extractelement <4 x i1> %cmp, i32 2
@@ -3991,6 +4511,14 @@ define i1 @movmsk_and_v2f64(<2 x double> %x, <2 x double> %y) {
 ; SKX-NEXT:    cmpb $3, %al
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: movmsk_and_v2f64:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vcmplepd %xmm0, %xmm1, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    cmpb $3, %al
+; AVX10-256-NEXT:    sete %al
+; AVX10-256-NEXT:    retq
   %cmp = fcmp oge <2 x double> %x, %y
   %e1 = extractelement <2 x i1> %cmp, i32 0
   %e2 = extractelement <2 x i1> %cmp, i32 1
@@ -4031,6 +4559,13 @@ define i1 @movmsk_or_v2f64(<2 x double> %x, <2 x double> %y) {
 ; SKX-NEXT:    kortestb %k0, %k0
 ; SKX-NEXT:    setne %al
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: movmsk_or_v2f64:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vcmplepd %xmm0, %xmm1, %k0
+; AVX10-256-NEXT:    kortestb %k0, %k0
+; AVX10-256-NEXT:    setne %al
+; AVX10-256-NEXT:    retq
   %cmp = fcmp oge <2 x double> %x, %y
   %e1 = extractelement <2 x i1> %cmp, i32 0
   %e2 = extractelement <2 x i1> %cmp, i32 1
@@ -4074,6 +4609,16 @@ define i1 @movmsk_v16i8_var(<16 x i8> %x, <16 x i8> %y, i32 %z) {
 ; SKX-NEXT:    andl $15, %edi
 ; SKX-NEXT:    movzbl -24(%rsp,%rdi), %eax
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: movmsk_v16i8_var:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    # kill: def $edi killed $edi def $rdi
+; AVX10-256-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
+; AVX10-256-NEXT:    vpmovm2b %k0, %xmm0
+; AVX10-256-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX10-256-NEXT:    andl $15, %edi
+; AVX10-256-NEXT:    movzbl -24(%rsp,%rdi), %eax
+; AVX10-256-NEXT:    retq
   %cmp = icmp eq <16 x i8> %x, %y
   %val = extractelement <16 x i1> %cmp, i32 %z
   ret i1 %val
@@ -4121,6 +4666,16 @@ define i1 @movmsk_v8i16_var(<8 x i16> %x, <8 x i16> %y, i32 %z) {
 ; SKX-NEXT:    andl $7, %edi
 ; SKX-NEXT:    movzbl -24(%rsp,%rdi,2), %eax
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: movmsk_v8i16_var:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    # kill: def $edi killed $edi def $rdi
+; AVX10-256-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0
+; AVX10-256-NEXT:    vpmovm2w %k0, %xmm0
+; AVX10-256-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX10-256-NEXT:    andl $7, %edi
+; AVX10-256-NEXT:    movzbl -24(%rsp,%rdi,2), %eax
+; AVX10-256-NEXT:    retq
   %cmp = icmp sgt <8 x i16> %x, %y
   %val = extractelement <8 x i1> %cmp, i32 %z
   ret i1 %val
@@ -4165,6 +4720,16 @@ define i1 @movmsk_v4i32_var(<4 x i32> %x, <4 x i32> %y, i32 %z) {
 ; SKX-NEXT:    andl $3, %edi
 ; SKX-NEXT:    movzbl -24(%rsp,%rdi,4), %eax
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: movmsk_v4i32_var:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    # kill: def $edi killed $edi def $rdi
+; AVX10-256-NEXT:    vpcmpgtd %xmm0, %xmm1, %k0
+; AVX10-256-NEXT:    vpmovm2d %k0, %xmm0
+; AVX10-256-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX10-256-NEXT:    andl $3, %edi
+; AVX10-256-NEXT:    movzbl -24(%rsp,%rdi,4), %eax
+; AVX10-256-NEXT:    retq
   %cmp = icmp slt <4 x i32> %x, %y
   %val = extractelement <4 x i1> %cmp, i32 %z
   ret i1 %val
@@ -4222,6 +4787,16 @@ define i1 @movmsk_v2i64_var(<2 x i64> %x, <2 x i64> %y, i32 %z) {
 ; SKX-NEXT:    andl $1, %edi
 ; SKX-NEXT:    movzbl -24(%rsp,%rdi,8), %eax
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: movmsk_v2i64_var:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    # kill: def $edi killed $edi def $rdi
+; AVX10-256-NEXT:    vpcmpneqq %xmm1, %xmm0, %k0
+; AVX10-256-NEXT:    vpmovm2q %k0, %xmm0
+; AVX10-256-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX10-256-NEXT:    andl $1, %edi
+; AVX10-256-NEXT:    movzbl -24(%rsp,%rdi,8), %eax
+; AVX10-256-NEXT:    retq
   %cmp = icmp ne <2 x i64> %x, %y
   %val = extractelement <2 x i1> %cmp, i32 %z
   ret i1 %val
@@ -4269,6 +4844,16 @@ define i1 @movmsk_v4f32_var(<4 x float> %x, <4 x float> %y, i32 %z) {
 ; SKX-NEXT:    andl $3, %edi
 ; SKX-NEXT:    movzbl -24(%rsp,%rdi,4), %eax
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: movmsk_v4f32_var:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    # kill: def $edi killed $edi def $rdi
+; AVX10-256-NEXT:    vcmpeq_uqps %xmm1, %xmm0, %k0
+; AVX10-256-NEXT:    vpmovm2d %k0, %xmm0
+; AVX10-256-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX10-256-NEXT:    andl $3, %edi
+; AVX10-256-NEXT:    movzbl -24(%rsp,%rdi,4), %eax
+; AVX10-256-NEXT:    retq
   %cmp = fcmp ueq <4 x float> %x, %y
   %val = extractelement <4 x i1> %cmp, i32 %z
   ret i1 %val
@@ -4313,6 +4898,16 @@ define i1 @movmsk_v2f64_var(<2 x double> %x, <2 x double> %y, i32 %z) {
 ; SKX-NEXT:    andl $1, %edi
 ; SKX-NEXT:    movzbl -24(%rsp,%rdi,8), %eax
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: movmsk_v2f64_var:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    # kill: def $edi killed $edi def $rdi
+; AVX10-256-NEXT:    vcmplepd %xmm0, %xmm1, %k0
+; AVX10-256-NEXT:    vpmovm2q %k0, %xmm0
+; AVX10-256-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX10-256-NEXT:    andl $1, %edi
+; AVX10-256-NEXT:    movzbl -24(%rsp,%rdi,8), %eax
+; AVX10-256-NEXT:    retq
   %cmp = fcmp oge <2 x double> %x, %y
   %val = extractelement <2 x i1> %cmp, i32 %z
   ret i1 %val
@@ -4371,6 +4966,18 @@ define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) {
 ; SKX-NEXT:    testb $1, %cl
 ; SKX-NEXT:    cmovel %edx, %eax
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: PR39665_c_ray:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vcmpltpd %xmm0, %xmm1, %k0
+; AVX10-256-NEXT:    kmovd %k0, %ecx
+; AVX10-256-NEXT:    testb $2, %cl
+; AVX10-256-NEXT:    movl $42, %eax
+; AVX10-256-NEXT:    movl $99, %edx
+; AVX10-256-NEXT:    cmovel %edx, %eax
+; AVX10-256-NEXT:    testb $1, %cl
+; AVX10-256-NEXT:    cmovel %edx, %eax
+; AVX10-256-NEXT:    retq
   %cmp = fcmp ogt <2 x double> %x, %y
   %e1 = extractelement <2 x i1> %cmp, i32 0
   %e2 = extractelement <2 x i1> %cmp, i32 1
@@ -4423,6 +5030,16 @@ define i32 @PR39665_c_ray_opt(<2 x double> %x, <2 x double> %y) {
 ; SKX-NEXT:    movl $99, %eax
 ; SKX-NEXT:    cmovel %ecx, %eax
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: PR39665_c_ray_opt:
+; AVX10-256:       # %bb.0:
+; AVX10-256-NEXT:    vcmpltpd %xmm0, %xmm1, %k0
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    cmpb $3, %al
+; AVX10-256-NEXT:    movl $42, %ecx
+; AVX10-256-NEXT:    movl $99, %eax
+; AVX10-256-NEXT:    cmovel %ecx, %eax
+; AVX10-256-NEXT:    retq
   %cmp = fcmp ogt <2 x double> %x, %y
   %shift = shufflevector <2 x i1> %cmp, <2 x i1> poison, <2 x i32> <i32 1, i32 undef>
   %1 = and <2 x i1> %cmp, %shift
@@ -4551,6 +5168,25 @@ define i32 @pr67287(<2 x i64> %broadcast.splatinsert25) {
 ; SKX-NEXT:    movw $0, 0
 ; SKX-NEXT:    xorl %eax, %eax
 ; SKX-NEXT:    retq
+;
+; AVX10-256-LABEL: pr67287:
+; AVX10-256:       # %bb.0: # %entry
+; AVX10-256-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX10-256-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX10-256-NEXT:    vptestnmq %xmm0, %xmm0, %k0
+; AVX10-256-NEXT:    kortestb %k0, %k0
+; AVX10-256-NEXT:    jne .LBB97_2
+; AVX10-256-NEXT:  # %bb.1: # %entry
+; AVX10-256-NEXT:    kmovd %k0, %eax
+; AVX10-256-NEXT:    testb $1, %al
+; AVX10-256-NEXT:    jne .LBB97_2
+; AVX10-256-NEXT:  # %bb.3: # %middle.block
+; AVX10-256-NEXT:    xorl %eax, %eax
+; AVX10-256-NEXT:    retq
+; AVX10-256-NEXT:  .LBB97_2:
+; AVX10-256-NEXT:    movw $0, 0
+; AVX10-256-NEXT:    xorl %eax, %eax
+; AVX10-256-NEXT:    retq
 entry:
   %0 = and <2 x i64> %broadcast.splatinsert25, <i64 4294967295, i64 4294967295>
   %1 = icmp eq <2 x i64> %0, zeroinitializer
diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index d025fe955be515..d7183cf47eb13a 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -939,7 +939,8 @@ std::vector<MCInst> ExegesisX86Target::setRegTo(const MCSubtargetInfo &STI,
       }
       break;
     case 64:
-      if (STI.getFeatureBits()[X86::FeatureBWI]) {
+      if (STI.getFeatureBits()[X86::FeatureBWI] &&
+          STI.getFeatureBits()[X86::FeatureEVEX512]) {
         ConstantInliner CI(Value);
         return CI.loadAndFinalize(Reg, Value.getBitWidth(), X86::KMOVQkm);
       }



More information about the llvm-commits mailing list