[llvm] r322101 - [X86] Add a DAG combine to combine (sext (setcc)) with VLX
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 9 10:14:22 PST 2018
Author: ctopper
Date: Tue Jan 9 10:14:22 2018
New Revision: 322101
URL: http://llvm.org/viewvc/llvm-project?rev=322101&view=rev
Log:
[X86] Add a DAG combine to combine (sext (setcc)) with VLX
Normally target independent DAG combine would do this combine based on getSetCCResultType, but with VLX getSetCCResultType returns a vXi1 type preventing the DAG combining from kicking in.
But doing this combine can allow us to remove the explicit sign extend that would otherwise be emitted.
This patch adds a target specific DAG combine to combine the sext+setcc when the result type is the same size as the input to the setcc. I've restricted this to FP compares and things that can be represented with PCMPEQ and PCMPGT since we don't have full integer compare support on the older ISAs.
Differential Revision: https://reviews.llvm.org/D41850
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx-schedule.ll
llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll
llvm/trunk/test/CodeGen/X86/avx512-vec3-crash.ll
llvm/trunk/test/CodeGen/X86/commute-fcmp.ll
llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
llvm/trunk/test/CodeGen/X86/sse42-schedule.ll
llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll
llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll
llvm/trunk/test/CodeGen/X86/vselect-packss.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=322101&r1=322100&r2=322101&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jan 9 10:14:22 2018
@@ -35996,6 +35996,45 @@ static SDValue combineToExtendVectorInRe
return SDValue();
}
+// Attempt to combine a (sext/zext (setcc)) to a setcc with a xmm/ymm/zmm
+// result type.
+static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ // Only do this combine with AVX512 for vector extends.
+ if (!Subtarget.hasAVX512() || !VT.isVector() || N0->getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ // Only combine legal element types.
+ EVT SVT = VT.getVectorElementType();
+ if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32 &&
+ SVT != MVT::i64 && SVT != MVT::f32 && SVT != MVT::f64)
+ return SDValue();
+
+ // We can only do this if the vector size in 256 bits or less.
+ unsigned Size = VT.getSizeInBits();
+ if (Size > 256)
+ return SDValue();
+
+ // Don't fold if the condition code can't be handled by PCMPEQ/PCMPGT since
+ // that's the only integer compares with we have.
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0->getOperand(2))->get();
+ if (ISD::isUnsignedIntSetCC(CC) || CC == ISD::SETLE || CC == ISD::SETGE ||
+ CC == ISD::SETNE)
+ return SDValue();
+
+ // Only do this combine if the extension will be fully consumed by the setcc.
+ EVT N00VT = N0.getOperand(0).getValueType();
+ EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
+ if (Size != MatchingVecType.getSizeInBits())
+ return SDValue();
+
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
+}
+
static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -36013,6 +36052,9 @@ static SDValue combineSext(SDNode *N, Se
if (!DCI.isBeforeLegalizeOps())
return SDValue();
+ if (SDValue V = combineExtSetcc(N, DAG, Subtarget))
+ return V;
+
if (InVT == MVT::i1 && N0.getOpcode() == ISD::XOR &&
isAllOnesConstant(N0.getOperand(1)) && N0.hasOneUse()) {
// Invert and sign-extend a boolean is the same as zero-extend and subtract
Modified: llvm/trunk/test/CodeGen/X86/avx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=322101&r1=322100&r2=322101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll Tue Jan 9 10:14:22 2018
@@ -947,11 +947,9 @@ define <4 x double> @test_cmppd(<4 x dou
;
; SKX-LABEL: test_cmppd:
; SKX: # %bb.0:
-; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vcmpeqpd (%rdi), %ymm0, %k1 # sched: [10:1.00]
-; SKX-NEXT: vpmovm2q %k0, %ymm0 # sched: [1:0.25]
-; SKX-NEXT: vpmovm2q %k1, %ymm1 # sched: [1:0.25]
-; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
+; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.33]
+; SKX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
+; SKX-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_cmppd:
@@ -1015,11 +1013,9 @@ define <8 x float> @test_cmpps(<8 x floa
;
; SKX-LABEL: test_cmpps:
; SKX: # %bb.0:
-; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vcmpeqps (%rdi), %ymm0, %k1 # sched: [10:1.00]
-; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
-; SKX-NEXT: vpmovm2d %k1, %ymm1 # sched: [1:0.25]
-; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
+; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.33]
+; SKX-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
+; SKX-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_cmpps:
Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=322101&r1=322100&r2=322101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Tue Jan 9 10:14:22 2018
@@ -2159,10 +2159,8 @@ define <32 x i8> @test_pcmpeqb(<32 x i8>
;
; SKX-LABEL: test_pcmpeqb:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2b %k0, %ymm0 # sched: [1:0.25]
-; SKX-NEXT: vpcmpeqb (%rdi), %ymm0, %k0 # sched: [10:1.00]
-; SKX-NEXT: vpmovm2b %k0, %ymm0 # sched: [1:0.25]
+; SKX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_pcmpeqb:
@@ -2205,10 +2203,8 @@ define <8 x i32> @test_pcmpeqd(<8 x i32>
;
; SKX-LABEL: test_pcmpeqd:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
-; SKX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 # sched: [10:1.00]
-; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
+; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_pcmpeqd:
@@ -2251,10 +2247,8 @@ define <4 x i64> @test_pcmpeqq(<4 x i64>
;
; SKX-LABEL: test_pcmpeqq:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2q %k0, %ymm0 # sched: [1:0.25]
-; SKX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 # sched: [10:1.00]
-; SKX-NEXT: vpmovm2q %k0, %ymm0 # sched: [1:0.25]
+; SKX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_pcmpeqq:
@@ -2297,10 +2291,8 @@ define <16 x i16> @test_pcmpeqw(<16 x i1
;
; SKX-LABEL: test_pcmpeqw:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2w %k0, %ymm0 # sched: [1:0.25]
-; SKX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0 # sched: [10:1.00]
-; SKX-NEXT: vpmovm2w %k0, %ymm0 # sched: [1:0.25]
+; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_pcmpeqw:
@@ -2343,10 +2335,8 @@ define <32 x i8> @test_pcmpgtb(<32 x i8>
;
; SKX-LABEL: test_pcmpgtb:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2b %k0, %ymm0 # sched: [1:0.25]
-; SKX-NEXT: vpcmpgtb (%rdi), %ymm0, %k0 # sched: [10:1.00]
-; SKX-NEXT: vpmovm2b %k0, %ymm0 # sched: [1:0.25]
+; SKX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_pcmpgtb:
@@ -2389,10 +2379,8 @@ define <8 x i32> @test_pcmpgtd(<8 x i32>
;
; SKX-LABEL: test_pcmpgtd:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
-; SKX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 # sched: [10:1.00]
-; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
+; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_pcmpgtd:
@@ -2435,10 +2423,8 @@ define <4 x i64> @test_pcmpgtq(<4 x i64>
;
; SKX-LABEL: test_pcmpgtq:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2q %k0, %ymm0 # sched: [1:0.25]
-; SKX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 # sched: [10:1.00]
-; SKX-NEXT: vpmovm2q %k0, %ymm0 # sched: [1:0.25]
+; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_pcmpgtq:
@@ -2481,10 +2467,8 @@ define <16 x i16> @test_pcmpgtw(<16 x i1
;
; SKX-LABEL: test_pcmpgtw:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2w %k0, %ymm0 # sched: [1:0.25]
-; SKX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0 # sched: [10:1.00]
-; SKX-NEXT: vpmovm2w %k0, %ymm0 # sched: [1:0.25]
+; SKX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_pcmpgtw:
Modified: llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll?rev=322101&r1=322100&r2=322101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll Tue Jan 9 10:14:22 2018
@@ -848,22 +848,13 @@ define <8 x double> @test43(<8 x double>
}
define <4 x i32> @test44(<4 x i16> %x, <4 x i16> %y) #0 {
-; KNL-LABEL: test44:
-; KNL: ## %bb.0:
-; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; KNL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
-; KNL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
-; KNL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; KNL-NEXT: retq
-;
-; SKX-LABEL: test44:
-; SKX: ## %bb.0:
-; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; SKX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
-; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
-; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
-; SKX-NEXT: vpmovm2d %k0, %xmm0
-; SKX-NEXT: retq
+; CHECK-LABEL: test44:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
+; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
%mask = icmp eq <4 x i16> %x, %y
%1 = sext <4 x i1> %mask to <4 x i32>
ret <4 x i32> %1
Modified: llvm/trunk/test/CodeGen/X86/avx512-vec3-crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vec3-crash.ll?rev=322101&r1=322100&r2=322101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-vec3-crash.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-vec3-crash.ll Tue Jan 9 10:14:22 2018
@@ -9,14 +9,13 @@ define <3 x i8 > @foo(<3 x i8>%x, <3 x i
; CHECK-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
; CHECK-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0
; CHECK-NEXT: vpslld $24, %xmm0, %xmm0
-; CHECK-NEXT: vpsrad $24, %xmm0, %xmm0
; CHECK-NEXT: vmovd %ecx, %xmm1
; CHECK-NEXT: vpinsrd $1, %r8d, %xmm1, %xmm1
+; CHECK-NEXT: vpsrad $24, %xmm0, %xmm0
; CHECK-NEXT: vpinsrd $2, %r9d, %xmm1, %xmm1
; CHECK-NEXT: vpslld $24, %xmm1, %xmm1
; CHECK-NEXT: vpsrad $24, %xmm1, %xmm1
-; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
-; CHECK-NEXT: vpmovm2d %k0, %xmm0
+; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; CHECK-NEXT: vpextrb $0, %xmm0, %eax
; CHECK-NEXT: vpextrb $4, %xmm0, %edx
; CHECK-NEXT: vpextrb $8, %xmm0, %ecx
Modified: llvm/trunk/test/CodeGen/X86/commute-fcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/commute-fcmp.ll?rev=322101&r1=322100&r2=322101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/commute-fcmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/commute-fcmp.ll Tue Jan 9 10:14:22 2018
@@ -21,9 +21,7 @@ define <4 x i32> @commute_cmpps_eq(<4 x
;
; AVX512-LABEL: commute_cmpps_eq:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpeqps (%rdi), %xmm0, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp oeq <4 x float> %1, %a1
@@ -44,9 +42,7 @@ define <4 x i32> @commute_cmpps_ne(<4 x
;
; AVX512-LABEL: commute_cmpps_ne:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpneqps (%rdi), %xmm0, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vcmpneqps (%rdi), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp une <4 x float> %1, %a1
@@ -67,9 +63,7 @@ define <4 x i32> @commute_cmpps_ord(<4 x
;
; AVX512-LABEL: commute_cmpps_ord:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpordps (%rdi), %xmm0, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vcmpordps (%rdi), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp ord <4 x float> %1, %a1
@@ -90,9 +84,7 @@ define <4 x i32> @commute_cmpps_uno(<4 x
;
; AVX512-LABEL: commute_cmpps_uno:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpunordps (%rdi), %xmm0, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vcmpunordps (%rdi), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp uno <4 x float> %1, %a1
@@ -117,9 +109,7 @@ define <4 x i32> @commute_cmpps_ueq(<4 x
;
; AVX512-LABEL: commute_cmpps_ueq:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpeq_uqps (%rdi), %xmm0, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vcmpeq_uqps (%rdi), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp ueq <4 x float> %1, %a1
@@ -144,9 +134,7 @@ define <4 x i32> @commute_cmpps_one(<4 x
;
; AVX512-LABEL: commute_cmpps_one:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpneq_oqps (%rdi), %xmm0, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vcmpneq_oqps (%rdi), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp one <4 x float> %1, %a1
@@ -171,9 +159,7 @@ define <4 x i32> @commute_cmpps_lt(<4 x
; AVX512-LABEL: commute_cmpps_lt:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovaps (%rdi), %xmm1
-; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp olt <4 x float> %1, %a1
@@ -198,9 +184,7 @@ define <4 x i32> @commute_cmpps_le(<4 x
; AVX512-LABEL: commute_cmpps_le:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovaps (%rdi), %xmm1
-; AVX512-NEXT: vcmpleps %xmm0, %xmm1, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vcmpleps %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp ole <4 x float> %1, %a1
@@ -222,9 +206,7 @@ define <8 x i32> @commute_cmpps_eq_ymm(<
;
; AVX512-LABEL: commute_cmpps_eq_ymm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpeqps (%rdi), %ymm0, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp oeq <8 x float> %1, %a1
@@ -246,9 +228,7 @@ define <8 x i32> @commute_cmpps_ne_ymm(<
;
; AVX512-LABEL: commute_cmpps_ne_ymm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpneqps (%rdi), %ymm0, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vcmpneqps (%rdi), %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp une <8 x float> %1, %a1
@@ -270,9 +250,7 @@ define <8 x i32> @commute_cmpps_ord_ymm(
;
; AVX512-LABEL: commute_cmpps_ord_ymm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpordps (%rdi), %ymm0, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vcmpordps (%rdi), %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp ord <8 x float> %1, %a1
@@ -294,9 +272,7 @@ define <8 x i32> @commute_cmpps_uno_ymm(
;
; AVX512-LABEL: commute_cmpps_uno_ymm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpunordps (%rdi), %ymm0, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vcmpunordps (%rdi), %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp uno <8 x float> %1, %a1
@@ -326,9 +302,7 @@ define <8 x i32> @commute_cmpps_ueq_ymm(
;
; AVX512-LABEL: commute_cmpps_ueq_ymm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpeq_uqps (%rdi), %ymm0, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vcmpeq_uqps (%rdi), %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp ueq <8 x float> %1, %a1
@@ -358,9 +332,7 @@ define <8 x i32> @commute_cmpps_one_ymm(
;
; AVX512-LABEL: commute_cmpps_one_ymm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpneq_oqps (%rdi), %ymm0, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vcmpneq_oqps (%rdi), %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp one <8 x float> %1, %a1
@@ -388,9 +360,7 @@ define <8 x i32> @commute_cmpps_lt_ymm(<
; AVX512-LABEL: commute_cmpps_lt_ymm:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovaps (%rdi), %ymm1
-; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp olt <8 x float> %1, %a1
@@ -418,9 +388,7 @@ define <8 x i32> @commute_cmpps_le_ymm(<
; AVX512-LABEL: commute_cmpps_le_ymm:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovaps (%rdi), %ymm1
-; AVX512-NEXT: vcmpleps %ymm0, %ymm1, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vcmpleps %ymm0, %ymm1, %ymm0
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp ole <8 x float> %1, %a1
@@ -446,9 +414,7 @@ define <2 x i64> @commute_cmppd_eq(<2 x
;
; AVX512-LABEL: commute_cmppd_eq:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpeqpd (%rdi), %xmm0, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp oeq <2 x double> %1, %a1
@@ -469,9 +435,7 @@ define <2 x i64> @commute_cmppd_ne(<2 x
;
; AVX512-LABEL: commute_cmppd_ne:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpneqpd (%rdi), %xmm0, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vcmpneqpd (%rdi), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp une <2 x double> %1, %a1
@@ -492,9 +456,7 @@ define <2 x i64> @commute_cmppd_ord(<2 x
;
; AVX512-LABEL: commute_cmppd_ord:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpordpd (%rdi), %xmm0, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vcmpordpd (%rdi), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp ord <2 x double> %1, %a1
@@ -519,9 +481,7 @@ define <2 x i64> @commute_cmppd_ueq(<2 x
;
; AVX512-LABEL: commute_cmppd_ueq:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpeq_uqpd (%rdi), %xmm0, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vcmpeq_uqpd (%rdi), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp ueq <2 x double> %1, %a1
@@ -546,9 +506,7 @@ define <2 x i64> @commute_cmppd_one(<2 x
;
; AVX512-LABEL: commute_cmppd_one:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpneq_oqpd (%rdi), %xmm0, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vcmpneq_oqpd (%rdi), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp one <2 x double> %1, %a1
@@ -569,9 +527,7 @@ define <2 x i64> @commute_cmppd_uno(<2 x
;
; AVX512-LABEL: commute_cmppd_uno:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpunordpd (%rdi), %xmm0, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vcmpunordpd (%rdi), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp uno <2 x double> %1, %a1
@@ -596,9 +552,7 @@ define <2 x i64> @commute_cmppd_lt(<2 x
; AVX512-LABEL: commute_cmppd_lt:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovapd (%rdi), %xmm1
-; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp olt <2 x double> %1, %a1
@@ -623,9 +577,7 @@ define <2 x i64> @commute_cmppd_le(<2 x
; AVX512-LABEL: commute_cmppd_le:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovapd (%rdi), %xmm1
-; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp ole <2 x double> %1, %a1
@@ -647,9 +599,7 @@ define <4 x i64> @commute_cmppd_eq_ymmm(
;
; AVX512-LABEL: commute_cmppd_eq_ymmm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpeqpd (%rdi), %ymm0, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp oeq <4 x double> %1, %a1
@@ -671,9 +621,7 @@ define <4 x i64> @commute_cmppd_ne_ymmm(
;
; AVX512-LABEL: commute_cmppd_ne_ymmm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpneqpd (%rdi), %ymm0, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vcmpneqpd (%rdi), %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp une <4 x double> %1, %a1
@@ -695,9 +643,7 @@ define <4 x i64> @commute_cmppd_ord_ymmm
;
; AVX512-LABEL: commute_cmppd_ord_ymmm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpordpd (%rdi), %ymm0, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vcmpordpd (%rdi), %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp ord <4 x double> %1, %a1
@@ -719,9 +665,7 @@ define <4 x i64> @commute_cmppd_uno_ymmm
;
; AVX512-LABEL: commute_cmppd_uno_ymmm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpunordpd (%rdi), %ymm0, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vcmpunordpd (%rdi), %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp uno <4 x double> %1, %a1
@@ -751,9 +695,7 @@ define <4 x i64> @commute_cmppd_ueq_ymmm
;
; AVX512-LABEL: commute_cmppd_ueq_ymmm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpeq_uqpd (%rdi), %ymm0, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vcmpeq_uqpd (%rdi), %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp ueq <4 x double> %1, %a1
@@ -783,9 +725,7 @@ define <4 x i64> @commute_cmppd_one_ymmm
;
; AVX512-LABEL: commute_cmppd_one_ymmm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpneq_oqpd (%rdi), %ymm0, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vcmpneq_oqpd (%rdi), %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp one <4 x double> %1, %a1
@@ -813,9 +753,7 @@ define <4 x i64> @commute_cmppd_lt_ymmm(
; AVX512-LABEL: commute_cmppd_lt_ymmm:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovapd (%rdi), %ymm1
-; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp olt <4 x double> %1, %a1
@@ -843,9 +781,7 @@ define <4 x i64> @commute_cmppd_le_ymmm(
; AVX512-LABEL: commute_cmppd_le_ymmm:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovapd (%rdi), %ymm1
-; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %ymm0
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp ole <4 x double> %1, %a1
Modified: llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-schedule.ll?rev=322101&r1=322100&r2=322101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-schedule.ll Tue Jan 9 10:14:22 2018
@@ -860,10 +860,8 @@ define <2 x i64> @test_pcmpeqq(<2 x i64>
;
; SKX-LABEL: test_pcmpeqq:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 # sched: [9:1.00]
-; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_pcmpeqq:
Modified: llvm/trunk/test/CodeGen/X86/sse42-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse42-schedule.ll?rev=322101&r1=322100&r2=322101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse42-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse42-schedule.ll Tue Jan 9 10:14:22 2018
@@ -771,10 +771,8 @@ define <2 x i64> @test_pcmpgtq(<2 x i64>
;
; SKX-LABEL: test_pcmpgtq:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 # sched: [9:1.00]
-; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SKX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_pcmpgtq:
Modified: llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll?rev=322101&r1=322100&r2=322101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll Tue Jan 9 10:14:22 2018
@@ -23,10 +23,8 @@ define i64 @test_v2f64_sext(<2 x double>
;
; AVX512-LABEL: test_v2f64_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: retq
@@ -62,10 +60,8 @@ define i64 @test_v4f64_sext(<4 x double>
;
; AVX512-LABEL: test_v4f64_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
+; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
@@ -157,10 +153,8 @@ define i32 @test_v4f32_sext(<4 x float>
;
; AVX512-LABEL: test_v4f32_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
@@ -202,10 +196,8 @@ define i32 @test_v8f32_sext(<8 x float>
;
; AVX512-LABEL: test_v8f32_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
+; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
@@ -298,9 +290,7 @@ define i64 @test_v2i64_sext(<2 x i64> %a
;
; AVX512-LABEL: test_v2i64_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
@@ -352,9 +342,7 @@ define i64 @test_v4i64_sext(<4 x i64> %a
;
; AVX512-LABEL: test_v4i64_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
@@ -463,9 +451,7 @@ define i32 @test_v4i32_sext(<4 x i32> %a
;
; AVX512-LABEL: test_v4i32_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -523,9 +509,7 @@ define i32 @test_v8i32_sext(<8 x i32> %a
;
; AVX512-LABEL: test_v8i32_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
@@ -640,8 +624,7 @@ define i16 @test_v8i16_sext(<8 x i16> %a
;
; AVX512-LABEL: test_v8i16_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
-; AVX512-NEXT: vpmovm2w %k0, %xmm0
+; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -709,8 +692,7 @@ define i16 @test_v16i16_sext(<16 x i16>
;
; AVX512-LABEL: test_v16i16_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
-; AVX512-NEXT: vpmovm2w %k0, %ymm0
+; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
@@ -838,8 +820,7 @@ define i8 @test_v16i8_sext(<16 x i8> %a0
;
; AVX512-LABEL: test_v16i8_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
-; AVX512-NEXT: vpmovm2b %k0, %xmm0
+; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -913,8 +894,7 @@ define i8 @test_v32i8_sext(<32 x i8> %a0
;
; AVX512-LABEL: test_v32i8_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %k0
-; AVX512-NEXT: vpmovm2b %k0, %ymm0
+; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
Modified: llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll?rev=322101&r1=322100&r2=322101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll Tue Jan 9 10:14:22 2018
@@ -23,10 +23,8 @@ define i64 @test_v2f64_sext(<2 x double>
;
; AVX512-LABEL: test_v2f64_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: retq
@@ -60,10 +58,8 @@ define i64 @test_v4f64_sext(<4 x double>
;
; AVX512-LABEL: test_v4f64_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
+; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
@@ -147,10 +143,8 @@ define i32 @test_v4f32_sext(<4 x float>
;
; AVX512-LABEL: test_v4f32_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -188,10 +182,8 @@ define i32 @test_v8f32_sext(<8 x float>
;
; AVX512-LABEL: test_v8f32_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
+; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
@@ -280,9 +272,7 @@ define i64 @test_v2i64_sext(<2 x i64> %a
;
; AVX512-LABEL: test_v2i64_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
@@ -330,9 +320,7 @@ define i64 @test_v4i64_sext(<4 x i64> %a
;
; AVX512-LABEL: test_v4i64_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
@@ -431,9 +419,7 @@ define i32 @test_v4i32_sext(<4 x i32> %a
;
; AVX512-LABEL: test_v4i32_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -485,9 +471,7 @@ define i32 @test_v8i32_sext(<8 x i32> %a
;
; AVX512-LABEL: test_v8i32_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
@@ -592,8 +576,7 @@ define i16 @test_v8i16_sext(<8 x i16> %a
;
; AVX512-LABEL: test_v8i16_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
-; AVX512-NEXT: vpmovm2w %k0, %xmm0
+; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -658,8 +641,7 @@ define i16 @test_v16i16_sext(<16 x i16>
;
; AVX512-LABEL: test_v16i16_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
-; AVX512-NEXT: vpmovm2w %k0, %ymm0
+; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
@@ -777,8 +759,7 @@ define i8 @test_v16i8_sext(<16 x i8> %a0
;
; AVX512-LABEL: test_v16i8_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
-; AVX512-NEXT: vpmovm2b %k0, %xmm0
+; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -849,8 +830,7 @@ define i8 @test_v32i8_sext(<32 x i8> %a0
;
; AVX512-LABEL: test_v32i8_sext:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %k0
-; AVX512-NEXT: vpmovm2b %k0, %ymm0
+; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
Modified: llvm/trunk/test/CodeGen/X86/vselect-packss.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vselect-packss.ll?rev=322101&r1=322100&r2=322101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vselect-packss.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vselect-packss.ll Tue Jan 9 10:14:22 2018
@@ -377,33 +377,14 @@ define <16 x i8> @vselect_packss(<16 x i
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512NOBW-LABEL: vselect_packss:
-; AVX512NOBW: # %bb.0:
-; AVX512NOBW-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
-; AVX512NOBW-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512NOBW-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX512NOBW-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
-; AVX512NOBW-NEXT: vzeroupper
-; AVX512NOBW-NEXT: retq
-;
-; AVX512BWNOVL-LABEL: vselect_packss:
-; AVX512BWNOVL: # %bb.0:
-; AVX512BWNOVL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
-; AVX512BWNOVL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BWNOVL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX512BWNOVL-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
-; AVX512BWNOVL-NEXT: vzeroupper
-; AVX512BWNOVL-NEXT: retq
-;
-; AVX512BWVL-LABEL: vselect_packss:
-; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
-; AVX512BWVL-NEXT: vpmovm2w %k0, %ymm0
-; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BWVL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
-; AVX512BWVL-NEXT: vzeroupper
-; AVX512BWVL-NEXT: retq
+; AVX512-LABEL: vselect_packss:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%1 = icmp eq <16 x i16> %a0, %a1
%2 = sext <16 x i1> %1 to <16 x i16>
%3 = shufflevector <16 x i16> %2, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
More information about the llvm-commits
mailing list