[llvm] r322432 - [X86] Add DAG combine to promote vXi1 result of a vXi8/vXi16 setcc when we have AVX512 but not BWI.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 12 22:24:46 PST 2018
Author: ctopper
Date: Fri Jan 12 22:24:46 2018
New Revision: 322432
URL: http://llvm.org/viewvc/llvm-project?rev=322432&view=rev
Log:
[X86] Add DAG combine to promote vXi1 result of a vXi8/vXi16 setcc when we have AVX512 but not BWI.
This avoids having the result type stick around until lowering where we have to extend the setcc and insert a truncate. If we get the types converted early we can do more to optimize it.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
llvm/trunk/test/CodeGen/X86/vector-compare-results.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=322432&r1=322431&r2=322432&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Jan 12 22:24:46 2018
@@ -36377,10 +36377,10 @@ static SDValue combineSetCC(SDNode *N, S
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
EVT VT = N->getValueType(0);
+ EVT OpVT = LHS.getValueType();
SDLoc DL(N);
if (CC == ISD::SETNE || CC == ISD::SETEQ) {
- EVT OpVT = LHS.getValueType();
// 0-x == y --> x+y == 0
// 0-x != y --> x+y != 0
if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
@@ -36429,6 +36429,20 @@ static SDValue combineSetCC(SDNode *N, S
}
}
+ // If we have AVX512, but not BWI and this is a vXi16/vXi8 setcc, just
+ // pre-promote its result type since vXi1 vectors don't get promoted
+ // during type legalization.
+ // NOTE: The element count check is to ignore operand types that need to
+ // go through type promotion to a 128-bit vector.
+ if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() &&
+ VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() > 4 &&
+ (OpVT.getVectorElementType() == MVT::i8 ||
+ OpVT.getVectorElementType() == MVT::i16)) {
+ SDValue Setcc = DAG.getNode(ISD::SETCC, DL, OpVT, LHS, RHS,
+ N->getOperand(2));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, Setcc);
+ }
+
// For an SSE1-only target, lower a comparison of v4f32 to X86ISD::CMPP early
// to avoid scalarization via legalization because v4i32 is not a legal type.
if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32 &&
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll?rev=322432&r1=322431&r2=322432&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll Fri Jan 12 22:24:46 2018
@@ -18,9 +18,9 @@ define zeroext i32 @test_vpcmpeqb_v16i1_
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
@@ -54,9 +54,9 @@ define zeroext i32 @test_vpcmpeqb_v16i1_
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
@@ -181,11 +181,11 @@ define zeroext i64 @test_vpcmpeqb_v16i1_
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
@@ -222,11 +222,11 @@ define zeroext i64 @test_vpcmpeqb_v16i1_
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
@@ -1237,9 +1237,9 @@ define zeroext i32 @test_vpcmpeqw_v16i1_
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
@@ -1276,9 +1276,9 @@ define zeroext i32 @test_vpcmpeqw_v16i1_
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
@@ -1408,11 +1408,11 @@ define zeroext i64 @test_vpcmpeqw_v16i1_
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
@@ -1452,11 +1452,11 @@ define zeroext i64 @test_vpcmpeqw_v16i1_
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
@@ -8242,9 +8242,9 @@ define zeroext i32 @test_vpcmpsgtb_v16i1
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
@@ -8278,9 +8278,9 @@ define zeroext i32 @test_vpcmpsgtb_v16i1
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
@@ -8405,11 +8405,11 @@ define zeroext i64 @test_vpcmpsgtb_v16i1
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
@@ -8446,11 +8446,11 @@ define zeroext i64 @test_vpcmpsgtb_v16i1
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
@@ -9461,9 +9461,9 @@ define zeroext i32 @test_vpcmpsgtw_v16i1
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
@@ -9500,9 +9500,9 @@ define zeroext i32 @test_vpcmpsgtw_v16i1
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
@@ -9632,11 +9632,11 @@ define zeroext i64 @test_vpcmpsgtw_v16i1
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
@@ -9676,11 +9676,11 @@ define zeroext i64 @test_vpcmpsgtw_v16i1
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
@@ -16466,11 +16466,11 @@ define zeroext i32 @test_vpcmpsgeb_v16i1
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -16506,11 +16506,11 @@ define zeroext i32 @test_vpcmpsgeb_v16i1
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -16643,13 +16643,13 @@ define zeroext i64 @test_vpcmpsgeb_v16i1
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -16688,13 +16688,13 @@ define zeroext i64 @test_vpcmpsgeb_v16i1
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -17769,11 +17769,11 @@ define zeroext i32 @test_vpcmpsgew_v16i1
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
@@ -17812,11 +17812,11 @@ define zeroext i32 @test_vpcmpsgew_v16i1
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
@@ -17954,13 +17954,13 @@ define zeroext i64 @test_vpcmpsgew_v16i1
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
-; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
@@ -18002,13 +18002,13 @@ define zeroext i64 @test_vpcmpsgew_v16i1
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
-; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
@@ -24822,12 +24822,12 @@ define zeroext i32 @test_vpcmpultb_v16i1
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
@@ -24861,12 +24861,12 @@ define zeroext i32 @test_vpcmpultb_v16i1
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
@@ -24997,14 +24997,14 @@ define zeroext i64 @test_vpcmpultb_v16i1
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
@@ -25041,14 +25041,14 @@ define zeroext i64 @test_vpcmpultb_v16i1
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
@@ -26113,12 +26113,12 @@ define zeroext i32 @test_vpcmpultw_v16i1
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
@@ -26155,12 +26155,12 @@ define zeroext i32 @test_vpcmpultw_v16i1
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
@@ -26296,14 +26296,14 @@ define zeroext i64 @test_vpcmpultw_v16i1
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
@@ -26343,14 +26343,14 @@ define zeroext i64 @test_vpcmpultw_v16i1
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
Modified: llvm/trunk/test/CodeGen/X86/vector-compare-results.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-compare-results.ll?rev=322432&r1=322431&r2=322432&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-compare-results.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-compare-results.ll Fri Jan 12 22:24:46 2018
@@ -3771,63 +3771,35 @@ define <64 x i1> @test_cmp_v64i16(<64 x
;
; AVX512F-LABEL: test_cmp_v64i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm2, %ymm2
-; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
-; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3
-; AVX512F-NEXT: vpmovsxwd %ymm3, %zmm3
-; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
-; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
-; AVX512F-NEXT: vpsllw $7, %ymm2, %ymm2
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6
-; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm6, %ymm2
+; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm2, %ymm2
+; AVX512F-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1
; AVX512F-NEXT: vpcmpgtw %ymm4, %ymm0, %ymm0
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1
; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
-; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0
-; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
-; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm6, %ymm0
-; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm3
-; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
-; AVX512F-NEXT: # kill: def %xmm2 killed %xmm2 killed %ymm2
+; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
+; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
+; AVX512F-NEXT: vpmovsxwd %ymm3, %zmm3
+; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512DQ-LABEL: test_cmp_v64i16:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpcmpgtw %ymm6, %ymm2, %ymm2
-; AVX512DQ-NEXT: vpmovsxwd %ymm2, %zmm2
-; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2
; AVX512DQ-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpmovsxwd %ymm3, %zmm3
-; AVX512DQ-NEXT: vpmovdb %zmm3, %xmm3
-; AVX512DQ-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
-; AVX512DQ-NEXT: vpsllw $7, %ymm2, %ymm2
-; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512DQ-NEXT: vpxor %xmm6, %xmm6, %xmm6
-; AVX512DQ-NEXT: vpcmpgtb %ymm2, %ymm6, %ymm2
+; AVX512DQ-NEXT: vpcmpgtw %ymm6, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1
; AVX512DQ-NEXT: vpcmpgtw %ymm4, %ymm0, %ymm0
; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512DQ-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1
; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm1
; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
-; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpand %ymm3, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm6, %ymm0
-; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3
-; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
-; AVX512DQ-NEXT: # kill: def %xmm2 killed %xmm2 killed %ymm2
+; AVX512DQ-NEXT: vpmovsxwd %ymm2, %zmm2
+; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2
+; AVX512DQ-NEXT: vpmovsxwd %ymm3, %zmm3
+; AVX512DQ-NEXT: vpmovdb %zmm3, %xmm3
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
More information about the llvm-commits
mailing list