[llvm] r315274 - [AVX512] Add patterns to commute integer comparison instructions during isel.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 9 23:36:46 PDT 2017


Author: ctopper
Date: Mon Oct  9 23:36:46 2017
New Revision: 315274

URL: http://llvm.org/viewvc/llvm-project?rev=315274&view=rev
Log:
[AVX512] Add patterns to commute integer comparison instructions during isel.

This enables broadcast loads to be commuted and allows normal loads to be folded without the peephole pass.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=315274&r1=315273&r2=315274&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Mon Oct  9 23:36:46 2017
@@ -1998,6 +1998,24 @@ defm VPCMPGTQ : avx512_icmp_packed_rmb_v
                       avx512vl_i64_info, HasAVX512>,
                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
 
+// Transforms to swizzle an immediate to help matching memory operand in first
+// operand.
+def CommutePCMPCC : SDNodeXForm<imm, [{
+  uint8_t Imm = N->getZExtValue() & 0x7;
+  switch (Imm) {
+  default: llvm_unreachable("Unreachable!");
+  case 0x01: Imm = 0x06; break; // LT  -> NLE
+  case 0x02: Imm = 0x05; break; // LE  -> NLT
+  case 0x05: Imm = 0x02; break; // NLT -> LE
+  case 0x06: Imm = 0x01; break; // NLE -> LT
+  case 0x00: // EQ
+  case 0x03: // FALSE
+  case 0x04: // NE
+  case 0x07: // TRUE
+    break;
+  }
+  return getI8Imm(Imm, SDLoc(N));
+}]>;
 
 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
                           X86VectorVTInfo _> {
@@ -2069,6 +2087,17 @@ multiclass avx512_icmp_cc<bits<8> opc, s
                           "$dst {${mask}}, $src1, $src2, $cc}"),
                [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
   }
+
+  def : Pat<(OpNode (bitconvert (_.LdFrag addr:$src2)),
+                    (_.VT _.RC:$src1), imm:$cc),
+            (!cast<Instruction>(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
+                                                      (CommutePCMPCC imm:$cc))>;
+
+  def : Pat<(and _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src2)),
+                                        (_.VT _.RC:$src1), imm:$cc)),
+            (!cast<Instruction>(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask,
+                                                       _.RC:$src1, addr:$src2,
+                                                       (CommutePCMPCC imm:$cc))>;
 }
 
 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
@@ -2113,6 +2142,18 @@ multiclass avx512_icmp_cc_rmb<bits<8> op
                   "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
                [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
   }
+
+  def : Pat<(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
+                    (_.VT _.RC:$src1), imm:$cc),
+            (!cast<Instruction>(NAME#_.ZSuffix#"rmib") _.RC:$src1, addr:$src2,
+                                                       (CommutePCMPCC imm:$cc))>;
+
+  def : Pat<(and _.KRCWM:$mask, (OpNode (X86VBroadcast
+                                         (_.ScalarLdFrag addr:$src2)),
+                                        (_.VT _.RC:$src1), imm:$cc)),
+            (!cast<Instruction>(NAME#_.ZSuffix#"rmibk") _.KRCWM:$mask,
+                                                       _.RC:$src1, addr:$src2,
+                                                       (CommutePCMPCC imm:$cc))>;
 }
 
 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,

Modified: llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll?rev=315274&r1=315273&r2=315274&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll Mon Oct  9 23:36:46 2017
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=VLX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=NoVLX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-peephole -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=VLX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-peephole -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=NoVLX
 
 define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
 ; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
@@ -29317,8 +29317,7 @@ entry:
 define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastd (%rdi), %xmm1
-; VLX-NEXT:    vpcmpled %xmm0, %xmm1, %k0
+; VLX-NEXT:    vpcmpnltd (%rdi){1to4}, %xmm0, %k0
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; VLX-NEXT:    retq
@@ -29379,9 +29378,8 @@ entry:
 define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastd (%rsi), %xmm1
 ; VLX-NEXT:    kmovd %edi, %k1
-; VLX-NEXT:    vpcmpled %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT:    vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; VLX-NEXT:    retq
@@ -29735,8 +29733,7 @@ entry:
 define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastd (%rdi), %xmm1
-; VLX-NEXT:    vpcmpled %xmm0, %xmm1, %k0
+; VLX-NEXT:    vpcmpnltd (%rdi){1to4}, %xmm0, %k0
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; VLX-NEXT:    retq
@@ -29796,9 +29793,8 @@ entry:
 define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastd (%rsi), %xmm1
 ; VLX-NEXT:    kmovd %edi, %k1
-; VLX-NEXT:    vpcmpled %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT:    vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; VLX-NEXT:    retq
@@ -30095,8 +30091,7 @@ entry:
 define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastd (%rdi), %xmm1
-; VLX-NEXT:    vpcmpled %xmm0, %xmm1, %k0
+; VLX-NEXT:    vpcmpnltd (%rdi){1to4}, %xmm0, %k0
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    retq
 ;
@@ -30142,9 +30137,8 @@ entry:
 define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastd (%rsi), %xmm1
 ; VLX-NEXT:    kmovd %edi, %k1
-; VLX-NEXT:    vpcmpled %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT:    vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    retq
 ;
@@ -30451,8 +30445,7 @@ entry:
 define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastd (%rdi), %xmm1
-; VLX-NEXT:    vpcmpled %xmm0, %xmm1, %k0
+; VLX-NEXT:    vpcmpnltd (%rdi){1to4}, %xmm0, %k0
 ; VLX-NEXT:    kmovq %k0, %rax
 ; VLX-NEXT:    retq
 ;
@@ -30504,9 +30497,8 @@ entry:
 define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastd (%rsi), %xmm1
 ; VLX-NEXT:    kmovd %edi, %k1
-; VLX-NEXT:    vpcmpled %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT:    vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovq %k0, %rax
 ; VLX-NEXT:    retq
 ;
@@ -30705,8 +30697,7 @@ entry:
 define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastd (%rdi), %ymm1
-; VLX-NEXT:    vpcmpled %ymm0, %ymm1, %k0
+; VLX-NEXT:    vpcmpnltd (%rdi){1to8}, %ymm0, %k0
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; VLX-NEXT:    vzeroupper
@@ -30737,9 +30728,8 @@ entry:
 define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastd (%rsi), %ymm1
 ; VLX-NEXT:    kmovd %edi, %k1
-; VLX-NEXT:    vpcmpled %ymm0, %ymm1, %k0 {%k1}
+; VLX-NEXT:    vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; VLX-NEXT:    vzeroupper
@@ -31077,8 +31067,7 @@ entry:
 define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastd (%rdi), %ymm1
-; VLX-NEXT:    vpcmpled %ymm0, %ymm1, %k0
+; VLX-NEXT:    vpcmpnltd (%rdi){1to8}, %ymm0, %k0
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    vzeroupper
 ; VLX-NEXT:    retq
@@ -31153,9 +31142,8 @@ entry:
 define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastd (%rsi), %ymm1
 ; VLX-NEXT:    kmovd %edi, %k1
-; VLX-NEXT:    vpcmpled %ymm0, %ymm1, %k0 {%k1}
+; VLX-NEXT:    vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    vzeroupper
 ; VLX-NEXT:    retq
@@ -31557,8 +31545,7 @@ entry:
 define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastd (%rdi), %ymm1
-; VLX-NEXT:    vpcmpled %ymm0, %ymm1, %k0
+; VLX-NEXT:    vpcmpnltd (%rdi){1to8}, %ymm0, %k0
 ; VLX-NEXT:    kmovq %k0, %rax
 ; VLX-NEXT:    vzeroupper
 ; VLX-NEXT:    retq
@@ -31638,9 +31625,8 @@ entry:
 define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastd (%rsi), %ymm1
 ; VLX-NEXT:    kmovd %edi, %k1
-; VLX-NEXT:    vpcmpled %ymm0, %ymm1, %k0 {%k1}
+; VLX-NEXT:    vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1}
 ; VLX-NEXT:    kmovq %k0, %rax
 ; VLX-NEXT:    vzeroupper
 ; VLX-NEXT:    retq
@@ -32199,8 +32185,7 @@ entry:
 define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastd (%rdi), %zmm1
-; VLX-NEXT:    vpcmpled %zmm0, %zmm1, %k0
+; VLX-NEXT:    vpcmpnltd (%rdi){1to16}, %zmm0, %k0
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    vzeroupper
 ; VLX-NEXT:    retq
@@ -32224,8 +32209,7 @@ define zeroext i32 @test_vpcmpsged_v16i1
 ; NoVLX-NEXT:    .cfi_offset %r13, -40
 ; NoVLX-NEXT:    .cfi_offset %r14, -32
 ; NoVLX-NEXT:    .cfi_offset %r15, -24
-; NoVLX-NEXT:    vpbroadcastd (%rdi), %zmm1
-; NoVLX-NEXT:    vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT:    vpcmpnltd (%rdi){1to16}, %zmm0, %k0
 ; NoVLX-NEXT:    kxorw %k0, %k0, %k1
 ; NoVLX-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp)
 ; NoVLX-NEXT:    kshiftlw $14, %k0, %k1
@@ -32319,9 +32303,8 @@ entry:
 define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastd (%rsi), %zmm1
 ; VLX-NEXT:    kmovd %edi, %k1
-; VLX-NEXT:    vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; VLX-NEXT:    vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    vzeroupper
 ; VLX-NEXT:    retq
@@ -32345,9 +32328,8 @@ define zeroext i32 @test_masked_vpcmpsge
 ; NoVLX-NEXT:    .cfi_offset %r13, -40
 ; NoVLX-NEXT:    .cfi_offset %r14, -32
 ; NoVLX-NEXT:    .cfi_offset %r15, -24
-; NoVLX-NEXT:    vpbroadcastd (%rsi), %zmm1
 ; NoVLX-NEXT:    kmovw %edi, %k1
-; NoVLX-NEXT:    vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT:    vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
 ; NoVLX-NEXT:    kxorw %k0, %k0, %k1
 ; NoVLX-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp)
 ; NoVLX-NEXT:    kshiftlw $14, %k0, %k1
@@ -32939,8 +32921,7 @@ entry:
 define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastd (%rdi), %zmm1
-; VLX-NEXT:    vpcmpled %zmm0, %zmm1, %k0
+; VLX-NEXT:    vpcmpnltd (%rdi){1to16}, %zmm0, %k0
 ; VLX-NEXT:    kmovq %k0, %rax
 ; VLX-NEXT:    vzeroupper
 ; VLX-NEXT:    retq
@@ -32964,8 +32945,7 @@ define zeroext i64 @test_vpcmpsged_v16i1
 ; NoVLX-NEXT:    .cfi_offset %r13, -40
 ; NoVLX-NEXT:    .cfi_offset %r14, -32
 ; NoVLX-NEXT:    .cfi_offset %r15, -24
-; NoVLX-NEXT:    vpbroadcastd (%rdi), %zmm1
-; NoVLX-NEXT:    vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT:    vpcmpnltd (%rdi){1to16}, %zmm0, %k0
 ; NoVLX-NEXT:    kxorw %k0, %k0, %k1
 ; NoVLX-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp)
 ; NoVLX-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp)
@@ -33064,9 +33044,8 @@ entry:
 define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastd (%rsi), %zmm1
 ; VLX-NEXT:    kmovd %edi, %k1
-; VLX-NEXT:    vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; VLX-NEXT:    vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovq %k0, %rax
 ; VLX-NEXT:    vzeroupper
 ; VLX-NEXT:    retq
@@ -33090,9 +33069,8 @@ define zeroext i64 @test_masked_vpcmpsge
 ; NoVLX-NEXT:    .cfi_offset %r13, -40
 ; NoVLX-NEXT:    .cfi_offset %r14, -32
 ; NoVLX-NEXT:    .cfi_offset %r15, -24
-; NoVLX-NEXT:    vpbroadcastd (%rsi), %zmm1
 ; NoVLX-NEXT:    kmovw %edi, %k1
-; NoVLX-NEXT:    vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT:    vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
 ; NoVLX-NEXT:    kxorw %k0, %k0, %k1
 ; NoVLX-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp)
 ; NoVLX-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp)
@@ -33343,8 +33321,7 @@ entry:
 define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastq (%rdi), %xmm1
-; VLX-NEXT:    vpcmpleq %xmm0, %xmm1, %k0
+; VLX-NEXT:    vpcmpnltq (%rdi){1to2}, %xmm0, %k0
 ; VLX-NEXT:    kmovb %k0, -{{[0-9]+}}(%rsp)
 ; VLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; VLX-NEXT:    retq
@@ -33377,9 +33354,8 @@ entry:
 define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastq (%rsi), %xmm1
 ; VLX-NEXT:    kmovd %edi, %k1
-; VLX-NEXT:    vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT:    vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovb %k0, -{{[0-9]+}}(%rsp)
 ; VLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; VLX-NEXT:    retq
@@ -33621,8 +33597,7 @@ entry:
 define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastq (%rdi), %xmm1
-; VLX-NEXT:    vpcmpleq %xmm0, %xmm1, %k0
+; VLX-NEXT:    vpcmpnltq (%rdi){1to2}, %xmm0, %k0
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; VLX-NEXT:    retq
@@ -33667,9 +33642,8 @@ entry:
 define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastq (%rsi), %xmm1
 ; VLX-NEXT:    kmovd %edi, %k1
-; VLX-NEXT:    vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT:    vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; VLX-NEXT:    retq
@@ -33919,8 +33893,7 @@ entry:
 define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastq (%rdi), %xmm1
-; VLX-NEXT:    vpcmpleq %xmm0, %xmm1, %k0
+; VLX-NEXT:    vpcmpnltq (%rdi){1to2}, %xmm0, %k0
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; VLX-NEXT:    retq
@@ -33964,9 +33937,8 @@ entry:
 define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastq (%rsi), %xmm1
 ; VLX-NEXT:    kmovd %edi, %k1
-; VLX-NEXT:    vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT:    vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; VLX-NEXT:    retq
@@ -34223,8 +34195,7 @@ entry:
 define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastq (%rdi), %xmm1
-; VLX-NEXT:    vpcmpleq %xmm0, %xmm1, %k0
+; VLX-NEXT:    vpcmpnltq (%rdi){1to2}, %xmm0, %k0
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    retq
 ;
@@ -34270,9 +34241,8 @@ entry:
 define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastq (%rsi), %xmm1
 ; VLX-NEXT:    kmovd %edi, %k1
-; VLX-NEXT:    vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT:    vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    retq
 ;
@@ -34555,8 +34525,7 @@ entry:
 define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastq (%rdi), %xmm1
-; VLX-NEXT:    vpcmpleq %xmm0, %xmm1, %k0
+; VLX-NEXT:    vpcmpnltq (%rdi){1to2}, %xmm0, %k0
 ; VLX-NEXT:    kmovq %k0, %rax
 ; VLX-NEXT:    retq
 ;
@@ -34608,9 +34577,8 @@ entry:
 define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastq (%rsi), %xmm1
 ; VLX-NEXT:    kmovd %edi, %k1
-; VLX-NEXT:    vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT:    vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovq %k0, %rax
 ; VLX-NEXT:    retq
 ;
@@ -34963,8 +34931,7 @@ entry:
 define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastq (%rdi), %ymm1
-; VLX-NEXT:    vpcmpleq %ymm0, %ymm1, %k0
+; VLX-NEXT:    vpcmpnltq (%rdi){1to4}, %ymm0, %k0
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; VLX-NEXT:    vzeroupper
@@ -35027,9 +34994,8 @@ entry:
 define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastq (%rsi), %ymm1
 ; VLX-NEXT:    kmovd %edi, %k1
-; VLX-NEXT:    vpcmpleq %ymm0, %ymm1, %k0 {%k1}
+; VLX-NEXT:    vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; VLX-NEXT:    vzeroupper
@@ -35399,8 +35365,7 @@ entry:
 define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastq (%rdi), %ymm1
-; VLX-NEXT:    vpcmpleq %ymm0, %ymm1, %k0
+; VLX-NEXT:    vpcmpnltq (%rdi){1to4}, %ymm0, %k0
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; VLX-NEXT:    vzeroupper
@@ -35462,9 +35427,8 @@ entry:
 define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastq (%rsi), %ymm1
 ; VLX-NEXT:    kmovd %edi, %k1
-; VLX-NEXT:    vpcmpleq %ymm0, %ymm1, %k0 {%k1}
+; VLX-NEXT:    vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; VLX-NEXT:    vzeroupper
@@ -35777,8 +35741,7 @@ entry:
 define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastq (%rdi), %ymm1
-; VLX-NEXT:    vpcmpleq %ymm0, %ymm1, %k0
+; VLX-NEXT:    vpcmpnltq (%rdi){1to4}, %ymm0, %k0
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    vzeroupper
 ; VLX-NEXT:    retq
@@ -35826,9 +35789,8 @@ entry:
 define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastq (%rsi), %ymm1
 ; VLX-NEXT:    kmovd %edi, %k1
-; VLX-NEXT:    vpcmpleq %ymm0, %ymm1, %k0 {%k1}
+; VLX-NEXT:    vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    vzeroupper
 ; VLX-NEXT:    retq
@@ -36151,8 +36113,7 @@ entry:
 define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastq (%rdi), %ymm1
-; VLX-NEXT:    vpcmpleq %ymm0, %ymm1, %k0
+; VLX-NEXT:    vpcmpnltq (%rdi){1to4}, %ymm0, %k0
 ; VLX-NEXT:    kmovq %k0, %rax
 ; VLX-NEXT:    vzeroupper
 ; VLX-NEXT:    retq
@@ -36206,9 +36167,8 @@ entry:
 define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastq (%rsi), %ymm1
 ; VLX-NEXT:    kmovd %edi, %k1
-; VLX-NEXT:    vpcmpleq %ymm0, %ymm1, %k0 {%k1}
+; VLX-NEXT:    vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
 ; VLX-NEXT:    kmovq %k0, %rax
 ; VLX-NEXT:    vzeroupper
 ; VLX-NEXT:    retq
@@ -36395,8 +36355,7 @@ entry:
 define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastq (%rdi), %zmm1
-; VLX-NEXT:    vpcmpleq %zmm0, %zmm1, %k0
+; VLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; VLX-NEXT:    vzeroupper
@@ -36404,8 +36363,7 @@ define zeroext i16 @test_vpcmpsgeq_v8i1_
 ;
 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
 ; NoVLX:       # BB#0: # %entry
-; NoVLX-NEXT:    vpbroadcastq (%rdi), %zmm1
-; NoVLX-NEXT:    vpcmpleq %zmm0, %zmm1, %k0
+; NoVLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
 ; NoVLX-NEXT:    kmovw %k0, %eax
 ; NoVLX-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; NoVLX-NEXT:    vzeroupper
@@ -36424,9 +36382,8 @@ entry:
 define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastq (%rsi), %zmm1
 ; VLX-NEXT:    kmovd %edi, %k1
-; VLX-NEXT:    vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; VLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; VLX-NEXT:    vzeroupper
@@ -36434,9 +36391,8 @@ define zeroext i16 @test_masked_vpcmpsge
 ;
 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
 ; NoVLX:       # BB#0: # %entry
-; NoVLX-NEXT:    vpbroadcastq (%rsi), %zmm1
 ; NoVLX-NEXT:    kmovw %edi, %k1
-; NoVLX-NEXT:    vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
 ; NoVLX-NEXT:    kmovw %k0, %eax
 ; NoVLX-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; NoVLX-NEXT:    vzeroupper
@@ -36753,8 +36709,7 @@ entry:
 define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastq (%rdi), %zmm1
-; VLX-NEXT:    vpcmpleq %zmm0, %zmm1, %k0
+; VLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    vzeroupper
 ; VLX-NEXT:    retq
@@ -36768,8 +36723,7 @@ define zeroext i32 @test_vpcmpsgeq_v8i1_
 ; NoVLX-NEXT:    .cfi_def_cfa_register %rbp
 ; NoVLX-NEXT:    andq $-32, %rsp
 ; NoVLX-NEXT:    subq $32, %rsp
-; NoVLX-NEXT:    vpbroadcastq (%rdi), %zmm1
-; NoVLX-NEXT:    vpcmpleq %zmm0, %zmm1, %k0
+; NoVLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
 ; NoVLX-NEXT:    kxorw %k0, %k0, %k1
 ; NoVLX-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp)
 ; NoVLX-NEXT:    kshiftlw $15, %k0, %k1
@@ -36828,9 +36782,8 @@ entry:
 define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastq (%rsi), %zmm1
 ; VLX-NEXT:    kmovd %edi, %k1
-; VLX-NEXT:    vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; VLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovd %k0, %eax
 ; VLX-NEXT:    vzeroupper
 ; VLX-NEXT:    retq
@@ -36844,9 +36797,8 @@ define zeroext i32 @test_masked_vpcmpsge
 ; NoVLX-NEXT:    .cfi_def_cfa_register %rbp
 ; NoVLX-NEXT:    andq $-32, %rsp
 ; NoVLX-NEXT:    subq $32, %rsp
-; NoVLX-NEXT:    vpbroadcastq (%rsi), %zmm1
 ; NoVLX-NEXT:    kmovw %edi, %k1
-; NoVLX-NEXT:    vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
 ; NoVLX-NEXT:    kxorw %k0, %k0, %k1
 ; NoVLX-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp)
 ; NoVLX-NEXT:    kshiftlw $15, %k0, %k1
@@ -37223,8 +37175,7 @@ entry:
 define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastq (%rdi), %zmm1
-; VLX-NEXT:    vpcmpleq %zmm0, %zmm1, %k0
+; VLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
 ; VLX-NEXT:    kmovq %k0, %rax
 ; VLX-NEXT:    vzeroupper
 ; VLX-NEXT:    retq
@@ -37238,8 +37189,7 @@ define zeroext i64 @test_vpcmpsgeq_v8i1_
 ; NoVLX-NEXT:    .cfi_def_cfa_register %rbp
 ; NoVLX-NEXT:    andq $-32, %rsp
 ; NoVLX-NEXT:    subq $64, %rsp
-; NoVLX-NEXT:    vpbroadcastq (%rdi), %zmm1
-; NoVLX-NEXT:    vpcmpleq %zmm0, %zmm1, %k0
+; NoVLX-NEXT:    vpcmpnltq (%rdi){1to8}, %zmm0, %k0
 ; NoVLX-NEXT:    kxorw %k0, %k0, %k1
 ; NoVLX-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp)
 ; NoVLX-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp)
@@ -37303,9 +37253,8 @@ entry:
 define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
 ; VLX:       # BB#0: # %entry
-; VLX-NEXT:    vpbroadcastq (%rsi), %zmm1
 ; VLX-NEXT:    kmovd %edi, %k1
-; VLX-NEXT:    vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; VLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
 ; VLX-NEXT:    kmovq %k0, %rax
 ; VLX-NEXT:    vzeroupper
 ; VLX-NEXT:    retq
@@ -37319,9 +37268,8 @@ define zeroext i64 @test_masked_vpcmpsge
 ; NoVLX-NEXT:    .cfi_def_cfa_register %rbp
 ; NoVLX-NEXT:    andq $-32, %rsp
 ; NoVLX-NEXT:    subq $64, %rsp
-; NoVLX-NEXT:    vpbroadcastq (%rsi), %zmm1
 ; NoVLX-NEXT:    kmovw %edi, %k1
-; NoVLX-NEXT:    vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT:    vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
 ; NoVLX-NEXT:    kxorw %k0, %k0, %k1
 ; NoVLX-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp)
 ; NoVLX-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp)




More information about the llvm-commits mailing list