[llvm] r323572 - [X86] Use vpternlog to implement vector not under AVX512.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 26 14:17:40 PST 2018


Author: ctopper
Date: Fri Jan 26 14:17:40 2018
New Revision: 323572

URL: http://llvm.org/viewvc/llvm-project?rev=323572&view=rev
Log:
[X86] Use vpternlog to implement vector not under AVX512.

Previously we had to materialize all 1s in a register using vpternlog or pcmpeq and then xor with that. By using vpternlog directly we can do it in one operation.

This is implemented using isel patterns, but we should maybe consider creating a generalized vpternlog combiner.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/avx512-ext.ll
    llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
    llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll
    llvm/trunk/test/CodeGen/X86/vec_cmp_sint-128.ll
    llvm/trunk/test/CodeGen/X86/vec_cmp_uint-128.ll
    llvm/trunk/test/CodeGen/X86/vector-lzcnt-512.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=323572&r1=323571&r2=323572&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Fri Jan 26 14:17:40 2018
@@ -10207,6 +10207,42 @@ defm VPTERNLOGD : avx512_common_ternlog<
 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SSE_INTALU_ITINS_P,
                                         avx512vl_i64_info>, VEX_W;
 
+
+// Patterns to implement vnot using vpternlog instead of creating all ones
+// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
+// so that the result is only dependent on src0. But we use the same source
+// for all operands to prevent a false dependency.
+// TODO: We should maybe have a more generalized algorithm for folding to
+// vpternlog.
+let Predicates = [HasAVX512] in {
+  def : Pat<(v8i64 (xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV)))),
+            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
+}
+
+let Predicates = [HasAVX512, NoVLX] in {
+  def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
+            (EXTRACT_SUBREG
+             (VPTERNLOGQZrri
+              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+              (i8 15)), sub_xmm)>;
+  def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
+            (EXTRACT_SUBREG
+             (VPTERNLOGQZrri
+              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+              (i8 15)), sub_ymm)>;
+}
+
+let Predicates = [HasVLX] in {
+  def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
+            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
+  def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
+            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
+}
+
 //===----------------------------------------------------------------------===//
 // AVX-512 - FixupImm
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/test/CodeGen/X86/avx512-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-ext.ll?rev=323572&r1=323571&r2=323572&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-ext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-ext.ll Fri Jan 26 14:17:40 2018
@@ -1409,8 +1409,8 @@ define <8 x i32> @sext_8i1_8i32(<8 x i32
 ; KNL-LABEL: sext_8i1_8i32:
 ; KNL:       # %bb.0:
 ; KNL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
-; KNL-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
-; KNL-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; KNL-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; KNL-NEXT:    # kill: def %ymm0 killed %ymm0 killed %zmm0
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: sext_8i1_8i32:

Modified: llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll?rev=323572&r1=323571&r2=323572&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll Fri Jan 26 14:17:40 2018
@@ -10804,8 +10804,7 @@ define zeroext i32 @test_vpcmpsgeb_v16i1
 ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
@@ -10832,8 +10831,7 @@ define zeroext i32 @test_vpcmpsgeb_v16i1
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
 ; NoVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
@@ -10861,8 +10859,7 @@ define zeroext i32 @test_masked_vpcmpsge
 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    kmovw %edi, %k1
@@ -10893,8 +10890,7 @@ define zeroext i32 @test_masked_vpcmpsge
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
 ; NoVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    kmovw %edi, %k1
@@ -10925,8 +10921,7 @@ define zeroext i64 @test_vpcmpsgeb_v16i1
 ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
@@ -10954,8 +10949,7 @@ define zeroext i64 @test_vpcmpsgeb_v16i1
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
 ; NoVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
@@ -10984,8 +10978,7 @@ define zeroext i64 @test_masked_vpcmpsge
 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    kmovw %edi, %k1
@@ -11017,8 +11010,7 @@ define zeroext i64 @test_masked_vpcmpsge
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
 ; NoVLX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    kmovw %edi, %k1
@@ -11051,8 +11043,7 @@ define zeroext i64 @test_vpcmpsgeb_v32i1
 ; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
-; NoVLX-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
 ; NoVLX-NEXT:    vpslld $31, %zmm1, %zmm1
 ; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
@@ -11087,8 +11078,7 @@ define zeroext i64 @test_vpcmpsgeb_v32i1
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
 ; NoVLX-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
-; NoVLX-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm1
 ; NoVLX-NEXT:    vpslld $31, %zmm1, %zmm1
 ; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0
@@ -11126,8 +11116,7 @@ define zeroext i64 @test_masked_vpcmpsge
 ; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    shrl $16, %edi
 ; NoVLX-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
-; NoVLX-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; NoVLX-NEXT:    vpmovsxbd %xmm1, %zmm1
 ; NoVLX-NEXT:    vpslld $31, %zmm1, %zmm1
@@ -11168,8 +11157,7 @@ define zeroext i64 @test_masked_vpcmpsge
 ; NoVLX-NEXT:    shrl $16, %edi
 ; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
 ; NoVLX-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
-; NoVLX-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; NoVLX-NEXT:    vpmovsxbd %xmm1, %zmm1
 ; NoVLX-NEXT:    vpslld $31, %zmm1, %zmm1
@@ -11208,8 +11196,7 @@ define zeroext i16 @test_vpcmpsgew_v8i1_
 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask:
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
 ; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
@@ -11238,8 +11225,7 @@ define zeroext i16 @test_vpcmpsgew_v8i1_
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
 ; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
 ; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
@@ -11269,8 +11255,7 @@ define zeroext i16 @test_masked_vpcmpsge
 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask:
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
 ; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
 ; NoVLX-NEXT:    kmovw %edi, %k1
@@ -11303,8 +11288,7 @@ define zeroext i16 @test_masked_vpcmpsge
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
 ; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
 ; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
 ; NoVLX-NEXT:    kmovw %edi, %k1
@@ -11336,8 +11320,7 @@ define zeroext i32 @test_vpcmpsgew_v8i1_
 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
 ; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
@@ -11364,8 +11347,7 @@ define zeroext i32 @test_vpcmpsgew_v8i1_
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
 ; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
 ; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
@@ -11393,8 +11375,7 @@ define zeroext i32 @test_masked_vpcmpsge
 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
 ; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
 ; NoVLX-NEXT:    kmovw %edi, %k1
@@ -11425,8 +11406,7 @@ define zeroext i32 @test_masked_vpcmpsge
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
 ; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
 ; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
 ; NoVLX-NEXT:    kmovw %edi, %k1
@@ -11457,8 +11437,7 @@ define zeroext i64 @test_vpcmpsgew_v8i1_
 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
 ; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
@@ -11486,8 +11465,7 @@ define zeroext i64 @test_vpcmpsgew_v8i1_
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vmovdqa (%rdi), %xmm1
 ; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
 ; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmq %zmm0, %zmm0, %k0
@@ -11516,8 +11494,7 @@ define zeroext i64 @test_masked_vpcmpsge
 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
 ; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
 ; NoVLX-NEXT:    kmovw %edi, %k1
@@ -11549,8 +11526,7 @@ define zeroext i64 @test_masked_vpcmpsge
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vmovdqa (%rsi), %xmm1
 ; NoVLX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxwq %xmm0, %zmm0
 ; NoVLX-NEXT:    vpsllq $63, %zmm0, %zmm0
 ; NoVLX-NEXT:    kmovw %edi, %k1
@@ -11583,8 +11559,7 @@ define zeroext i32 @test_vpcmpsgew_v16i1
 ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
-; NoVLX-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
@@ -11612,8 +11587,7 @@ define zeroext i32 @test_vpcmpsgew_v16i1
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
 ; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
-; NoVLX-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
@@ -11642,8 +11616,7 @@ define zeroext i32 @test_masked_vpcmpsge
 ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
-; NoVLX-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    kmovw %edi, %k1
@@ -11675,8 +11648,7 @@ define zeroext i32 @test_masked_vpcmpsge
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
 ; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
-; NoVLX-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    kmovw %edi, %k1
@@ -11708,8 +11680,7 @@ define zeroext i64 @test_vpcmpsgew_v16i1
 ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
-; NoVLX-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
@@ -11738,8 +11709,7 @@ define zeroext i64 @test_vpcmpsgew_v16i1
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vmovdqa (%rdi), %ymm1
 ; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
-; NoVLX-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
@@ -11769,8 +11739,7 @@ define zeroext i64 @test_masked_vpcmpsge
 ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
-; NoVLX-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    kmovw %edi, %k1
@@ -11803,8 +11772,7 @@ define zeroext i64 @test_masked_vpcmpsge
 ; NoVLX:       # %bb.0: # %entry
 ; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
 ; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
-; NoVLX-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    kmovw %edi, %k1
@@ -12008,14 +11976,13 @@ define zeroext i64 @test_vpcmpsgew_v32i1
 ; NoVLX-NEXT:    vpinsrw $7, %eax, %xmm2, %xmm2
 ; NoVLX-NEXT:    vinserti128 $1, %xmm5, %ymm2, %ymm2
 ; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm2, %ymm0
-; NoVLX-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
-; NoVLX-NEXT:    vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
 ; NoVLX-NEXT:    kmovw %k0, %ecx
-; NoVLX-NEXT:    vpxor %ymm2, %ymm1, %ymm0
-; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT:    vpmovsxwd %ymm1, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
 ; NoVLX-NEXT:    kmovw %k0, %eax
@@ -12131,14 +12098,13 @@ define zeroext i64 @test_vpcmpsgew_v32i1
 ; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm2, %ymm0
 ; NoVLX-NEXT:    vmovdqa 32(%rdi), %ymm2
 ; NoVLX-NEXT:    vpcmpgtw %ymm1, %ymm2, %ymm1
-; NoVLX-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
-; NoVLX-NEXT:    vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
 ; NoVLX-NEXT:    kmovw %k0, %ecx
-; NoVLX-NEXT:    vpxor %ymm2, %ymm1, %ymm0
-; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT:    vpmovsxwd %ymm1, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
 ; NoVLX-NEXT:    kmovw %k0, %eax
@@ -12341,11 +12307,10 @@ define zeroext i64 @test_masked_vpcmpsge
 ; NoVLX-NEXT:    vpinsrw $7, %eax, %xmm4, %xmm1
 ; NoVLX-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 ; NoVLX-NEXT:    vpcmpgtw %ymm3, %ymm1, %ymm1
-; NoVLX-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
-; NoVLX-NEXT:    vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; NoVLX-NEXT:    vpmovsxwd %ymm1, %zmm1
 ; NoVLX-NEXT:    vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT:    vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    kmovw %edi, %k2
@@ -12422,27 +12387,27 @@ define zeroext i64 @test_masked_vpcmpsge
 ; NoVLX-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0
 ; NoVLX-NEXT:    vmovq %xmm4, %rcx
 ; NoVLX-NEXT:    shrq $48, %rax
-; NoVLX-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm3
 ; NoVLX-NEXT:    movl %ecx, %eax
 ; NoVLX-NEXT:    shrl $16, %eax
-; NoVLX-NEXT:    vmovd %ecx, %xmm3
-; NoVLX-NEXT:    vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT:    vmovd %ecx, %xmm0
+; NoVLX-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
 ; NoVLX-NEXT:    movq %rcx, %rax
 ; NoVLX-NEXT:    shrq $32, %rax
-; NoVLX-NEXT:    vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0
 ; NoVLX-NEXT:    vpextrq $1, %xmm4, %rax
 ; NoVLX-NEXT:    shrq $48, %rcx
-; NoVLX-NEXT:    vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT:    vpinsrw $3, %ecx, %xmm0, %xmm0
 ; NoVLX-NEXT:    movl %eax, %ecx
 ; NoVLX-NEXT:    shrl $16, %ecx
-; NoVLX-NEXT:    vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT:    vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0
 ; NoVLX-NEXT:    movq %rax, %rcx
 ; NoVLX-NEXT:    shrq $32, %rcx
-; NoVLX-NEXT:    vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0
 ; NoVLX-NEXT:    vmovq %xmm1, %rcx
 ; NoVLX-NEXT:    shrq $48, %rax
-; NoVLX-NEXT:    vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0
 ; NoVLX-NEXT:    movl %ecx, %eax
 ; NoVLX-NEXT:    shrl $16, %eax
 ; NoVLX-NEXT:    vmovd %ecx, %xmm4
@@ -12462,25 +12427,24 @@ define zeroext i64 @test_masked_vpcmpsge
 ; NoVLX-NEXT:    vpinsrw $6, %ecx, %xmm1, %xmm1
 ; NoVLX-NEXT:    shrq $48, %rax
 ; NoVLX-NEXT:    vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; NoVLX-NEXT:    vinserti128 $1, %xmm2, %ymm3, %ymm2
 ; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    shrl $16, %edi
-; NoVLX-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
-; NoVLX-NEXT:    vmovdqa (%rsi), %ymm2
-; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; NoVLX-NEXT:    vmovdqa (%rsi), %ymm1
+; NoVLX-NEXT:    vpcmpgtw %ymm2, %ymm1, %ymm1
 ; NoVLX-NEXT:    vmovdqa 32(%rsi), %ymm2
-; NoVLX-NEXT:    vpcmpgtw %ymm1, %ymm2, %ymm1
-; NoVLX-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
-; NoVLX-NEXT:    vpxor %ymm2, %ymm1, %ymm1
-; NoVLX-NEXT:    vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT:    vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT:    vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT:    vpcmpgtw %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; NoVLX-NEXT:    vpmovsxwd %ymm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT:    vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT:    vpslld $31, %zmm1, %zmm1
 ; NoVLX-NEXT:    kmovw %edi, %k2
-; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0 {%k1}
 ; NoVLX-NEXT:    kmovw %k0, %ecx
-; NoVLX-NEXT:    vptestmd %zmm1, %zmm1, %k0 {%k2}
+; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0 {%k2}
 ; NoVLX-NEXT:    kmovw %k0, %eax
 ; NoVLX-NEXT:    shll $16, %eax
 ; NoVLX-NEXT:    orl %ecx, %eax

Modified: llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll?rev=323572&r1=323571&r2=323572&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll (original)
+++ llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll Fri Jan 26 14:17:40 2018
@@ -321,15 +321,34 @@ define i16 @test_reduce_v8i16(<8 x i16>
 ; X64-SSE42-NEXT:    ## kill: def %ax killed %ax killed %eax
 ; X64-SSE42-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_reduce_v8i16:
-; X64-AVX:       ## %bb.0:
-; X64-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; X64-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vmovd %xmm0, %eax
-; X64-AVX-NEXT:    ## kill: def %ax killed %ax killed %eax
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_reduce_v8i16:
+; X64-AVX1:       ## %bb.0:
+; X64-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT:    vmovd %xmm0, %eax
+; X64-AVX1-NEXT:    ## kill: def %ax killed %ax killed %eax
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_reduce_v8i16:
+; X64-AVX2:       ## %bb.0:
+; X64-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vmovd %xmm0, %eax
+; X64-AVX2-NEXT:    ## kill: def %ax killed %ax killed %eax
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512-LABEL: test_reduce_v8i16:
+; X64-AVX512:       ## %bb.0:
+; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
+; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
+; X64-AVX512-NEXT:    vmovd %xmm0, %eax
+; X64-AVX512-NEXT:    ## kill: def %ax killed %ax killed %eax
+; X64-AVX512-NEXT:    retq
   %1  = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
   %2  = icmp ugt <8 x i16> %a0, %1
   %3  = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1
@@ -414,17 +433,40 @@ define i8 @test_reduce_v16i8(<16 x i8> %
 ; X64-SSE42-NEXT:    ## kill: def %al killed %al killed %eax
 ; X64-SSE42-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_reduce_v16i8:
-; X64-AVX:       ## %bb.0:
-; X64-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; X64-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm2
-; X64-AVX-NEXT:    vpminub %xmm2, %xmm0, %xmm0
-; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpextrb $0, %xmm0, %eax
-; X64-AVX-NEXT:    ## kill: def %al killed %al killed %eax
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_reduce_v16i8:
+; X64-AVX1:       ## %bb.0:
+; X64-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm2
+; X64-AVX1-NEXT:    vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpextrb $0, %xmm0, %eax
+; X64-AVX1-NEXT:    ## kill: def %al killed %al killed %eax
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_reduce_v16i8:
+; X64-AVX2:       ## %bb.0:
+; X64-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm2
+; X64-AVX2-NEXT:    vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpextrb $0, %xmm0, %eax
+; X64-AVX2-NEXT:    ## kill: def %al killed %al killed %eax
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512-LABEL: test_reduce_v16i8:
+; X64-AVX512:       ## %bb.0:
+; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
+; X64-AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
+; X64-AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
+; X64-AVX512-NEXT:    vpextrb $0, %xmm0, %eax
+; X64-AVX512-NEXT:    ## kill: def %al killed %al killed %eax
+; X64-AVX512-NEXT:    retq
   %1  = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %2  = icmp ugt <16 x i8> %a0, %1
   %3  = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1
@@ -976,10 +1018,9 @@ define i16 @test_reduce_v16i16(<16 x i16
 ; X64-AVX512:       ## %bb.0:
 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; X64-AVX512-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; X64-AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vmovd %xmm0, %eax
 ; X64-AVX512-NEXT:    ## kill: def %ax killed %ax killed %eax
 ; X64-AVX512-NEXT:    vzeroupper
@@ -1127,12 +1168,11 @@ define i8 @test_reduce_v32i8(<32 x i8> %
 ; X64-AVX512:       ## %bb.0:
 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; X64-AVX512-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; X64-AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm2
-; X64-AVX512-NEXT:    vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
+; X64-AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
+; X64-AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpextrb $0, %xmm0, %eax
 ; X64-AVX512-NEXT:    ## kill: def %al killed %al killed %eax
 ; X64-AVX512-NEXT:    vzeroupper
@@ -1914,10 +1954,9 @@ define i16 @test_reduce_v32i16(<32 x i16
 ; X64-AVX512-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; X64-AVX512-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; X64-AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vmovd %xmm0, %eax
 ; X64-AVX512-NEXT:    ## kill: def %ax killed %ax killed %eax
 ; X64-AVX512-NEXT:    vzeroupper
@@ -2086,12 +2125,11 @@ define i8 @test_reduce_v64i8(<64 x i8> %
 ; X64-AVX512-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; X64-AVX512-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; X64-AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm2
-; X64-AVX512-NEXT:    vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
+; X64-AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
+; X64-AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpextrb $0, %xmm0, %eax
 ; X64-AVX512-NEXT:    ## kill: def %al killed %al killed %eax
 ; X64-AVX512-NEXT:    vzeroupper

Modified: llvm/trunk/test/CodeGen/X86/vec_cmp_sint-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_cmp_sint-128.ll?rev=323572&r1=323571&r2=323572&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_cmp_sint-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_cmp_sint-128.ll Fri Jan 26 14:17:40 2018
@@ -133,17 +133,32 @@ define <2 x i64> @ne_v2i64(<2 x i64> %a,
 ; SSE42-NEXT:    pxor %xmm1, %xmm0
 ; SSE42-NEXT:    retq
 ;
-; AVX-LABEL: ne_v2i64:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: ne_v2i64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: ne_v2i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; XOP-LABEL: ne_v2i64:
 ; XOP:       # %bb.0:
 ; XOP-NEXT:    vpcomneqq %xmm1, %xmm0, %xmm0
 ; XOP-NEXT:    retq
+;
+; AVX512-LABEL: ne_v2i64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    # kill: def %xmm0 killed %xmm0 killed %zmm0
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
   %1 = icmp ne <2 x i64> %a, %b
   %2 = sext <2 x i1> %1 to <2 x i64>
   ret <2 x i64> %2
@@ -157,17 +172,32 @@ define <4 x i32> @ne_v4i32(<4 x i32> %a,
 ; SSE-NEXT:    pxor %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: ne_v4i32:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: ne_v4i32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: ne_v4i32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; XOP-LABEL: ne_v4i32:
 ; XOP:       # %bb.0:
 ; XOP-NEXT:    vpcomneqd %xmm1, %xmm0, %xmm0
 ; XOP-NEXT:    retq
+;
+; AVX512-LABEL: ne_v4i32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    # kill: def %xmm0 killed %xmm0 killed %zmm0
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
   %1 = icmp ne <4 x i32> %a, %b
   %2 = sext <4 x i1> %1 to <4 x i32>
   ret <4 x i32> %2
@@ -181,17 +211,32 @@ define <8 x i16> @ne_v8i16(<8 x i16> %a,
 ; SSE-NEXT:    pxor %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: ne_v8i16:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: ne_v8i16:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: ne_v8i16:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; XOP-LABEL: ne_v8i16:
 ; XOP:       # %bb.0:
 ; XOP-NEXT:    vpcomneqw %xmm1, %xmm0, %xmm0
 ; XOP-NEXT:    retq
+;
+; AVX512-LABEL: ne_v8i16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    # kill: def %xmm0 killed %xmm0 killed %zmm0
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
   %1 = icmp ne <8 x i16> %a, %b
   %2 = sext <8 x i1> %1 to <8 x i16>
   ret <8 x i16> %2
@@ -205,17 +250,32 @@ define <16 x i8> @ne_v16i8(<16 x i8> %a,
 ; SSE-NEXT:    pxor %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: ne_v16i8:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: ne_v16i8:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: ne_v16i8:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; XOP-LABEL: ne_v16i8:
 ; XOP:       # %bb.0:
 ; XOP-NEXT:    vpcomneqb %xmm1, %xmm0, %xmm0
 ; XOP-NEXT:    retq
+;
+; AVX512-LABEL: ne_v16i8:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    # kill: def %xmm0 killed %xmm0 killed %zmm0
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
   %1 = icmp ne <16 x i8> %a, %b
   %2 = sext <16 x i1> %1 to <16 x i8>
   ret <16 x i8> %2
@@ -267,17 +327,32 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a,
 ; SSE42-NEXT:    pxor %xmm1, %xmm0
 ; SSE42-NEXT:    retq
 ;
-; AVX-LABEL: ge_v2i64:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: ge_v2i64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: ge_v2i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; XOP-LABEL: ge_v2i64:
 ; XOP:       # %bb.0:
 ; XOP-NEXT:    vpcomgeq %xmm1, %xmm0, %xmm0
 ; XOP-NEXT:    retq
+;
+; AVX512-LABEL: ge_v2i64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    # kill: def %xmm0 killed %xmm0 killed %zmm0
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
   %1 = icmp sge <2 x i64> %a, %b
   %2 = sext <2 x i1> %1 to <2 x i64>
   ret <2 x i64> %2
@@ -291,17 +366,32 @@ define <4 x i32> @ge_v4i32(<4 x i32> %a,
 ; SSE-NEXT:    pxor %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: ge_v4i32:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: ge_v4i32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: ge_v4i32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; XOP-LABEL: ge_v4i32:
 ; XOP:       # %bb.0:
 ; XOP-NEXT:    vpcomged %xmm1, %xmm0, %xmm0
 ; XOP-NEXT:    retq
+;
+; AVX512-LABEL: ge_v4i32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    # kill: def %xmm0 killed %xmm0 killed %zmm0
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
   %1 = icmp sge <4 x i32> %a, %b
   %2 = sext <4 x i1> %1 to <4 x i32>
   ret <4 x i32> %2
@@ -315,17 +405,32 @@ define <8 x i16> @ge_v8i16(<8 x i16> %a,
 ; SSE-NEXT:    pxor %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: ge_v8i16:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: ge_v8i16:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: ge_v8i16:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; XOP-LABEL: ge_v8i16:
 ; XOP:       # %bb.0:
 ; XOP-NEXT:    vpcomgew %xmm1, %xmm0, %xmm0
 ; XOP-NEXT:    retq
+;
+; AVX512-LABEL: ge_v8i16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
+; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    # kill: def %xmm0 killed %xmm0 killed %zmm0
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
   %1 = icmp sge <8 x i16> %a, %b
   %2 = sext <8 x i1> %1 to <8 x i16>
   ret <8 x i16> %2
@@ -339,17 +444,32 @@ define <16 x i8> @ge_v16i8(<16 x i8> %a,
 ; SSE-NEXT:    pxor %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: ge_v16i8:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: ge_v16i8:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: ge_v16i8:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; XOP-LABEL: ge_v16i8:
 ; XOP:       # %bb.0:
 ; XOP-NEXT:    vpcomgeb %xmm1, %xmm0, %xmm0
 ; XOP-NEXT:    retq
+;
+; AVX512-LABEL: ge_v16i8:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    # kill: def %xmm0 killed %xmm0 killed %zmm0
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
   %1 = icmp sge <16 x i8> %a, %b
   %2 = sext <16 x i1> %1 to <16 x i8>
   ret <16 x i8> %2
@@ -515,17 +635,32 @@ define <2 x i64> @le_v2i64(<2 x i64> %a,
 ; SSE42-NEXT:    pxor %xmm1, %xmm0
 ; SSE42-NEXT:    retq
 ;
-; AVX-LABEL: le_v2i64:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: le_v2i64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: le_v2i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; XOP-LABEL: le_v2i64:
 ; XOP:       # %bb.0:
 ; XOP-NEXT:    vpcomleq %xmm1, %xmm0, %xmm0
 ; XOP-NEXT:    retq
+;
+; AVX512-LABEL: le_v2i64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    # kill: def %xmm0 killed %xmm0 killed %zmm0
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
   %1 = icmp sle <2 x i64> %a, %b
   %2 = sext <2 x i1> %1 to <2 x i64>
   ret <2 x i64> %2
@@ -539,17 +674,32 @@ define <4 x i32> @le_v4i32(<4 x i32> %a,
 ; SSE-NEXT:    pxor %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: le_v4i32:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: le_v4i32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: le_v4i32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; XOP-LABEL: le_v4i32:
 ; XOP:       # %bb.0:
 ; XOP-NEXT:    vpcomled %xmm1, %xmm0, %xmm0
 ; XOP-NEXT:    retq
+;
+; AVX512-LABEL: le_v4i32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    # kill: def %xmm0 killed %xmm0 killed %zmm0
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
   %1 = icmp sle <4 x i32> %a, %b
   %2 = sext <4 x i1> %1 to <4 x i32>
   ret <4 x i32> %2
@@ -563,17 +713,32 @@ define <8 x i16> @le_v8i16(<8 x i16> %a,
 ; SSE-NEXT:    pxor %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: le_v8i16:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: le_v8i16:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: le_v8i16:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; XOP-LABEL: le_v8i16:
 ; XOP:       # %bb.0:
 ; XOP-NEXT:    vpcomlew %xmm1, %xmm0, %xmm0
 ; XOP-NEXT:    retq
+;
+; AVX512-LABEL: le_v8i16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    # kill: def %xmm0 killed %xmm0 killed %zmm0
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
   %1 = icmp sle <8 x i16> %a, %b
   %2 = sext <8 x i1> %1 to <8 x i16>
   ret <8 x i16> %2
@@ -587,17 +752,32 @@ define <16 x i8> @le_v16i8(<16 x i8> %a,
 ; SSE-NEXT:    pxor %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: le_v16i8:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: le_v16i8:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: le_v16i8:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; XOP-LABEL: le_v16i8:
 ; XOP:       # %bb.0:
 ; XOP-NEXT:    vpcomleb %xmm1, %xmm0, %xmm0
 ; XOP-NEXT:    retq
+;
+; AVX512-LABEL: le_v16i8:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    # kill: def %xmm0 killed %xmm0 killed %zmm0
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
   %1 = icmp sle <16 x i8> %a, %b
   %2 = sext <16 x i1> %1 to <16 x i8>
   ret <16 x i8> %2

Modified: llvm/trunk/test/CodeGen/X86/vec_cmp_uint-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_cmp_uint-128.ll?rev=323572&r1=323571&r2=323572&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_cmp_uint-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_cmp_uint-128.ll Fri Jan 26 14:17:40 2018
@@ -133,17 +133,32 @@ define <2 x i64> @ne_v2i64(<2 x i64> %a,
 ; SSE42-NEXT:    pxor %xmm1, %xmm0
 ; SSE42-NEXT:    retq
 ;
-; AVX-LABEL: ne_v2i64:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: ne_v2i64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: ne_v2i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; XOP-LABEL: ne_v2i64:
 ; XOP:       # %bb.0:
 ; XOP-NEXT:    vpcomneqq %xmm1, %xmm0, %xmm0
 ; XOP-NEXT:    retq
+;
+; AVX512-LABEL: ne_v2i64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    # kill: def %xmm0 killed %xmm0 killed %zmm0
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
   %1 = icmp ne <2 x i64> %a, %b
   %2 = sext <2 x i1> %1 to <2 x i64>
   ret <2 x i64> %2
@@ -157,17 +172,32 @@ define <4 x i32> @ne_v4i32(<4 x i32> %a,
 ; SSE-NEXT:    pxor %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: ne_v4i32:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: ne_v4i32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: ne_v4i32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; XOP-LABEL: ne_v4i32:
 ; XOP:       # %bb.0:
 ; XOP-NEXT:    vpcomneqd %xmm1, %xmm0, %xmm0
 ; XOP-NEXT:    retq
+;
+; AVX512-LABEL: ne_v4i32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    # kill: def %xmm0 killed %xmm0 killed %zmm0
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
   %1 = icmp ne <4 x i32> %a, %b
   %2 = sext <4 x i1> %1 to <4 x i32>
   ret <4 x i32> %2
@@ -181,17 +211,32 @@ define <8 x i16> @ne_v8i16(<8 x i16> %a,
 ; SSE-NEXT:    pxor %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: ne_v8i16:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: ne_v8i16:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: ne_v8i16:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; XOP-LABEL: ne_v8i16:
 ; XOP:       # %bb.0:
 ; XOP-NEXT:    vpcomneqw %xmm1, %xmm0, %xmm0
 ; XOP-NEXT:    retq
+;
+; AVX512-LABEL: ne_v8i16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    # kill: def %xmm0 killed %xmm0 killed %zmm0
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
   %1 = icmp ne <8 x i16> %a, %b
   %2 = sext <8 x i1> %1 to <8 x i16>
   ret <8 x i16> %2
@@ -205,17 +250,32 @@ define <16 x i8> @ne_v16i8(<16 x i8> %a,
 ; SSE-NEXT:    pxor %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: ne_v16i8:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: ne_v16i8:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: ne_v16i8:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; XOP-LABEL: ne_v16i8:
 ; XOP:       # %bb.0:
 ; XOP-NEXT:    vpcomneqb %xmm1, %xmm0, %xmm0
 ; XOP-NEXT:    retq
+;
+; AVX512-LABEL: ne_v16i8:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    # kill: def %xmm0 killed %xmm0 killed %zmm0
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
   %1 = icmp ne <16 x i8> %a, %b
   %2 = sext <16 x i1> %1 to <16 x i8>
   ret <16 x i8> %2

Modified: llvm/trunk/test/CodeGen/X86/vector-lzcnt-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-lzcnt-512.ll?rev=323572&r1=323571&r2=323572&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-lzcnt-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-lzcnt-512.ll Fri Jan 26 14:17:40 2018
@@ -33,8 +33,7 @@ define <8 x i64> @testv8i64(<8 x i64> %i
 ; AVX512BW-NEXT:    vpandnq %zmm1, %zmm0, %zmm2
 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
 ; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
-; AVX512BW-NEXT:    vpternlogd $255, %zmm4, %zmm4, %zmm4
-; AVX512BW-NEXT:    vpxorq %zmm4, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
@@ -57,8 +56,7 @@ define <8 x i64> @testv8i64(<8 x i64> %i
 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    vpsrlq $32, %zmm0, %zmm1
 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
-; AVX512DQ-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm3
@@ -112,8 +110,7 @@ define <8 x i64> @testv8i64u(<8 x i64> %
 ; AVX512BW-NEXT:    vpandnq %zmm1, %zmm0, %zmm2
 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
 ; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
-; AVX512BW-NEXT:    vpternlogd $255, %zmm4, %zmm4, %zmm4
-; AVX512BW-NEXT:    vpxorq %zmm4, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
@@ -136,8 +133,7 @@ define <8 x i64> @testv8i64u(<8 x i64> %
 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    vpsrlq $32, %zmm0, %zmm1
 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
-; AVX512DQ-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm3
@@ -189,8 +185,7 @@ define <16 x i32> @testv16i32(<16 x i32>
 ; AVX512BW-NEXT:    vpandnq %zmm1, %zmm0, %zmm2
 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
 ; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
-; AVX512BW-NEXT:    vpternlogd $255, %zmm4, %zmm4, %zmm4
-; AVX512BW-NEXT:    vpxorq %zmm4, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
@@ -215,8 +210,7 @@ define <16 x i32> @testv16i32(<16 x i32>
 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    vpsrld $16, %zmm0, %zmm1
 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
-; AVX512DQ-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm3
@@ -276,8 +270,7 @@ define <16 x i32> @testv16i32u(<16 x i32
 ; AVX512BW-NEXT:    vpandnq %zmm1, %zmm0, %zmm2
 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
 ; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
-; AVX512BW-NEXT:    vpternlogd $255, %zmm4, %zmm4, %zmm4
-; AVX512BW-NEXT:    vpxorq %zmm4, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
@@ -302,8 +295,7 @@ define <16 x i32> @testv16i32u(<16 x i32
 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    vpsrld $16, %zmm0, %zmm1
 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
-; AVX512DQ-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm3




More information about the llvm-commits mailing list