[llvm] r358358 - [X86] Don't form masked vpcmp/vcmp/vptestm operations if the setcc node has more than one use.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 14 11:26:06 PDT 2019
Author: ctopper
Date: Sun Apr 14 11:26:06 2019
New Revision: 358358
URL: http://llvm.org/viewvc/llvm-project?rev=358358&view=rev
Log:
[X86] Don't form masked vpcmp/vcmp/vptestm operations if the setcc node has more than one use.
We're better of emitting a single compare + kand rather than a compare for the
other use and a masked compare.
I'm looking into using custom instruction selection for VPTESTM to reduce the
ridiculous number of permutations of patterns in the isel table. Putting a one
use check on all masked compare folding makes load fold matching in the custom
code easier.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/vec_uaddo.ll
llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=358358&r1=358357&r2=358358&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sun Apr 14 11:26:06 2019
@@ -388,11 +388,11 @@ multiclass AVX512_maskable_common_cmp<bi
multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
- dag RHS, bit IsCommutable = 0> :
+ dag RHS, dag RHS_su, bit IsCommutable = 0> :
AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
!con((ins _.KRCWM:$mask), Ins),
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
- (and _.KRCWM:$mask, RHS), IsCommutable>;
+ (and _.KRCWM:$mask, RHS_su), IsCommutable>;
// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
@@ -2020,15 +2020,16 @@ defm VPBLENDMW : blendmask_bw<0x66, "vpb
// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
+ PatFrag OpNode_su, PatFrag OpNodeSAE_su,
X86FoldableSchedWrite sched> {
defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc",
- (OpNode (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- imm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>;
+ (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
+ (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ imm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>;
let mayLoad = 1 in
defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
@@ -2036,6 +2037,8 @@ multiclass avx512_cmp_scalar<X86VectorVT
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc",
(OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
+ imm:$cc),
+ (OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
imm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
@@ -2044,9 +2047,10 @@ multiclass avx512_cmp_scalar<X86VectorVT
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
- (OpNodeSAE (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- imm:$cc)>,
+ (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ imm:$cc),
+ (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ imm:$cc)>,
EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
let isCodeGenOnly = 1 in {
@@ -2072,18 +2076,29 @@ multiclass avx512_cmp_scalar<X86VectorVT
}
}
+def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (X86cmpms node:$src1, node:$src2, node:$cc), [{
+ return N->hasOneUse();
+}]>;
+def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
+ return N->hasOneUse();
+}]>;
+
let Predicates = [HasAVX512] in {
let ExeDomain = SSEPackedSingle in
defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
+ X86cmpms_su, X86cmpmsSAE_su,
SchedWriteFCmp.Scl>, AVX512XSIi8Base;
let ExeDomain = SSEPackedDouble in
defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
+ X86cmpms_su, X86cmpmsSAE_su,
SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
}
multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
- X86FoldableSchedWrite sched, X86VectorVTInfo _,
- bit IsCommutable> {
+ PatFrag OpNode_su, X86FoldableSchedWrite sched,
+ X86VectorVTInfo _, bit IsCommutable> {
let isCommutable = IsCommutable in
def rr : AVX512BI<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
@@ -2102,22 +2117,23 @@ multiclass avx512_icmp_packed<bits<8> op
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2}"),
[(set _.KRC:$dst, (and _.KRCWM:$mask,
- (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>,
+ (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>,
EVEX_4V, EVEX_K, Sched<[sched]>;
def rmk : AVX512BI<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2}"),
[(set _.KRC:$dst, (and _.KRCWM:$mask,
- (OpNode (_.VT _.RC:$src1),
+ (OpNode_su (_.VT _.RC:$src1),
(_.VT (_.LdFrag addr:$src2)))))]>,
EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
+ PatFrag OpNode_su,
X86FoldableSchedWrite sched, X86VectorVTInfo _,
bit IsCommutable> :
- avx512_icmp_packed<opc, OpcodeStr, OpNode, sched, _, IsCommutable> {
+ avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched, _, IsCommutable> {
def rmb : AVX512BI<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
!strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
@@ -2132,7 +2148,7 @@ multiclass avx512_icmp_packed_rmb<bits<8
"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
[(set _.KRC:$dst, (and _.KRCWM:$mask,
- (OpNode (_.VT _.RC:$src1),
+ (OpNode_su (_.VT _.RC:$src1),
(X86VBroadcast
(_.ScalarLdFrag addr:$src2)))))]>,
EVEX_4V, EVEX_K, EVEX_B,
@@ -2140,33 +2156,34 @@ multiclass avx512_icmp_packed_rmb<bits<8
}
multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
- X86SchedWriteWidths sched,
+ PatFrag OpNode_su, X86SchedWriteWidths sched,
AVX512VLVectorVTInfo VTInfo, Predicate prd,
bit IsCommutable = 0> {
let Predicates = [prd] in
- defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.ZMM,
+ defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM,
VTInfo.info512, IsCommutable>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.YMM,
+ defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM,
VTInfo.info256, IsCommutable>, EVEX_V256;
- defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.XMM,
+ defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM,
VTInfo.info128, IsCommutable>, EVEX_V128;
}
}
multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
- PatFrag OpNode, X86SchedWriteWidths sched,
+ PatFrag OpNode, PatFrag OpNode_su,
+ X86SchedWriteWidths sched,
AVX512VLVectorVTInfo VTInfo,
Predicate prd, bit IsCommutable = 0> {
let Predicates = [prd] in
- defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.ZMM,
+ defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM,
VTInfo.info512, IsCommutable>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.YMM,
+ defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM,
VTInfo.info256, IsCommutable>, EVEX_V256;
- defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.XMM,
+ defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM,
VTInfo.info128, IsCommutable>, EVEX_V128;
}
}
@@ -2179,45 +2196,55 @@ def X86pcmpeqm_c : PatFrag<(ops node:$sr
def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
(setcc node:$src1, node:$src2, SETGT)>;
+def X86pcmpeqm_c_su : PatFrag<(ops node:$src1, node:$src2),
+ (X86pcmpeqm_c node:$src1, node:$src2), [{
+ return N->hasOneUse();
+}]>;
+def X86pcmpgtm_su : PatFrag<(ops node:$src1, node:$src2),
+ (X86pcmpgtm node:$src1, node:$src2), [{
+ return N->hasOneUse();
+}]>;
+
// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
// increase the pattern complexity the way an immediate would.
let AddedComplexity = 2 in {
// FIXME: Is there a better scheduler class for VPCMP?
-defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c,
+defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c, X86pcmpeqm_c_su,
SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
EVEX_CD8<8, CD8VF>, VEX_WIG;
-defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c,
+defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c, X86pcmpeqm_c_su,
SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
EVEX_CD8<16, CD8VF>, VEX_WIG;
-defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c,
+defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c, X86pcmpeqm_c_su,
SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
EVEX_CD8<32, CD8VF>;
-defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c,
+defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c, X86pcmpeqm_c_su,
SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
+defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm, X86pcmpgtm_su,
SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
EVEX_CD8<8, CD8VF>, VEX_WIG;
-defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
+defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm, X86pcmpgtm_su,
SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
EVEX_CD8<16, CD8VF>, VEX_WIG;
-defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
+defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm, X86pcmpgtm_su,
SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
EVEX_CD8<32, CD8VF>;
-defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
+defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, X86pcmpgtm_su,
SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
}
multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
- PatFrag CommFrag, X86FoldableSchedWrite sched,
+ PatFrag Frag_su, PatFrag CommFrag, PatFrag CommFrag_su,
+ X86FoldableSchedWrite sched,
X86VectorVTInfo _, string Name> {
let isCommutable = 1 in
def rri : AVX512AIi8<opc, MRMSrcReg,
@@ -2246,9 +2273,9 @@ multiclass avx512_icmp_cc<bits<8> opc, s
"\t{$cc, $src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2, $cc}"),
[(set _.KRC:$dst, (and _.KRCWM:$mask,
- (_.KVT (Frag:$cc (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- cond))))]>,
+ (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ cond))))]>,
EVEX_4V, EVEX_K, Sched<[sched]>;
def rmik : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
@@ -2258,7 +2285,7 @@ multiclass avx512_icmp_cc<bits<8> opc, s
"$dst {${mask}}, $src1, $src2, $cc}"),
[(set _.KRC:$dst, (and _.KRCWM:$mask,
(_.KVT
- (Frag:$cc
+ (Frag_su:$cc
(_.VT _.RC:$src1),
(_.VT (_.LdFrag addr:$src2)),
cond))))]>,
@@ -2270,7 +2297,7 @@ multiclass avx512_icmp_cc<bits<8> opc, s
_.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
def : Pat<(and _.KRCWM:$mask,
- (_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
+ (_.KVT (CommFrag_su:$cc (_.LdFrag addr:$src2),
(_.VT _.RC:$src1), cond))),
(!cast<Instruction>(Name#_.ZSuffix#"rmik")
_.KRCWM:$mask, _.RC:$src1, addr:$src2,
@@ -2278,9 +2305,11 @@ multiclass avx512_icmp_cc<bits<8> opc, s
}
multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
- PatFrag CommFrag, X86FoldableSchedWrite sched,
+ PatFrag Frag_su, PatFrag CommFrag,
+ PatFrag CommFrag_su, X86FoldableSchedWrite sched,
X86VectorVTInfo _, string Name> :
- avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched, _, Name> {
+ avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
+ sched, _, Name> {
def rmib : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
u8imm:$cc),
@@ -2300,7 +2329,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> op
"\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
[(set _.KRC:$dst, (and _.KRCWM:$mask,
- (_.KVT (Frag:$cc
+ (_.KVT (Frag_su:$cc
(_.VT _.RC:$src1),
(X86VBroadcast
(_.ScalarLdFrag addr:$src2)),
@@ -2313,7 +2342,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> op
_.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
def : Pat<(and _.KRCWM:$mask,
- (_.KVT (CommFrag:$cc (X86VBroadcast
+ (_.KVT (CommFrag_su:$cc (X86VBroadcast
(_.ScalarLdFrag addr:$src2)),
(_.VT _.RC:$src1), cond))),
(!cast<Instruction>(Name#_.ZSuffix#"rmibk")
@@ -2322,32 +2351,34 @@ multiclass avx512_icmp_cc_rmb<bits<8> op
}
multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
- PatFrag CommFrag, X86SchedWriteWidths sched,
+ PatFrag Frag_su, PatFrag CommFrag,
+ PatFrag CommFrag_su, X86SchedWriteWidths sched,
AVX512VLVectorVTInfo VTInfo, Predicate prd> {
let Predicates = [prd] in
- defm Z : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.ZMM,
- VTInfo.info512, NAME>, EVEX_V512;
+ defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
+ sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.YMM,
- VTInfo.info256, NAME>, EVEX_V256;
- defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.XMM,
- VTInfo.info128, NAME>, EVEX_V128;
+ defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
+ sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
+ defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
+ sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
}
}
multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
- PatFrag CommFrag, X86SchedWriteWidths sched,
+ PatFrag Frag_su, PatFrag CommFrag,
+ PatFrag CommFrag_su, X86SchedWriteWidths sched,
AVX512VLVectorVTInfo VTInfo, Predicate prd> {
let Predicates = [prd] in
- defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.ZMM,
- VTInfo.info512, NAME>, EVEX_V512;
+ defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
+ sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.YMM,
- VTInfo.info256, NAME>, EVEX_V256;
- defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.XMM,
- VTInfo.info128, NAME>, EVEX_V128;
+ defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
+ sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
+ defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
+ sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
}
}
@@ -2371,6 +2402,12 @@ def X86pcmpm : PatFrag<(ops node:$src1,
return !ISD::isUnsignedIntSetCC(CC);
}], X86pcmpm_imm>;
+def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (setcc node:$src1, node:$src2, node:$cc), [{
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
+}], X86pcmpm_imm>;
+
// Same as above, but commutes immediate. Use for load folding.
def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
(setcc node:$src1, node:$src2, node:$cc), [{
@@ -2378,12 +2415,24 @@ def X86pcmpm_commute : PatFrag<(ops node
return !ISD::isUnsignedIntSetCC(CC);
}], X86pcmpm_imm_commute>;
+def X86pcmpm_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (setcc node:$src1, node:$src2, node:$cc), [{
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
+}], X86pcmpm_imm_commute>;
+
def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
(setcc node:$src1, node:$src2, node:$cc), [{
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
return ISD::isUnsignedIntSetCC(CC);
}], X86pcmpm_imm>;
+def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (setcc node:$src1, node:$src2, node:$cc), [{
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
+}], X86pcmpm_imm>;
+
// Same as above, but commutes immediate. Use for load folding.
def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
(setcc node:$src1, node:$src2, node:$cc), [{
@@ -2391,53 +2440,76 @@ def X86pcmpum_commute : PatFrag<(ops nod
return ISD::isUnsignedIntSetCC(CC);
}], X86pcmpm_imm_commute>;
+def X86pcmpum_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (setcc node:$src1, node:$src2, node:$cc), [{
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
+}], X86pcmpm_imm_commute>;
+
// FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
-defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_commute,
+defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
+ X86pcmpm_commute, X86pcmpm_commute_su,
SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
EVEX_CD8<8, CD8VF>;
-defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_commute,
+defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
+ X86pcmpum_commute, X86pcmpum_commute_su,
SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
EVEX_CD8<8, CD8VF>;
-defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_commute,
+defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
+ X86pcmpm_commute, X86pcmpm_commute_su,
SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
VEX_W, EVEX_CD8<16, CD8VF>;
-defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_commute,
+defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
+ X86pcmpum_commute, X86pcmpum_commute_su,
SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
VEX_W, EVEX_CD8<16, CD8VF>;
-defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_commute,
+defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
+ X86pcmpm_commute, X86pcmpm_commute_su,
SchedWriteVecALU, avx512vl_i32_info,
HasAVX512>, EVEX_CD8<32, CD8VF>;
-defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_commute,
+defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
+ X86pcmpum_commute, X86pcmpum_commute_su,
SchedWriteVecALU, avx512vl_i32_info,
HasAVX512>, EVEX_CD8<32, CD8VF>;
-defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_commute,
+defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
+ X86pcmpm_commute, X86pcmpm_commute_su,
SchedWriteVecALU, avx512vl_i64_info,
HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_commute,
+defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
+ X86pcmpum_commute, X86pcmpum_commute_su,
SchedWriteVecALU, avx512vl_i64_info,
HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
+def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (X86cmpm node:$src1, node:$src2, node:$cc), [{
+ return N->hasOneUse();
+}]>;
+def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (X86cmpmSAE node:$src1, node:$src2, node:$cc), [{
+ return N->hasOneUse();
+}]>;
+
multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
string Name> {
defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc",
- (X86cmpm (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- imm:$cc), 1>,
- Sched<[sched]>;
+ (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
+ (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
+ 1>, Sched<[sched]>;
defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc",
- (X86cmpm (_.VT _.RC:$src1),
- (_.VT (_.LdFrag addr:$src2)),
- imm:$cc)>,
+ (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
+ imm:$cc),
+ (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
+ imm:$cc)>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
@@ -2448,7 +2520,10 @@ multiclass avx512_vcmp_common<X86Foldabl
"$src1, ${src2}"#_.BroadcastStr#", $cc",
(X86cmpm (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
- imm:$cc)>,
+ imm:$cc),
+ (X86cmpm_su (_.VT _.RC:$src1),
+ (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
+ imm:$cc)>,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
// Patterns for selecting with loads in other operand.
@@ -2457,9 +2532,9 @@ multiclass avx512_vcmp_common<X86Foldabl
(!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
imm:$cc)>;
- def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2),
- (_.VT _.RC:$src1),
- CommutableCMPCC:$cc)),
+ def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
+ (_.VT _.RC:$src1),
+ CommutableCMPCC:$cc)),
(!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
_.RC:$src1, addr:$src2,
imm:$cc)>;
@@ -2469,10 +2544,10 @@ multiclass avx512_vcmp_common<X86Foldabl
(!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
imm:$cc)>;
- def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast
- (_.ScalarLdFrag addr:$src2)),
- (_.VT _.RC:$src1),
- CommutableCMPCC:$cc)),
+ def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2)),
+ (_.VT _.RC:$src1),
+ CommutableCMPCC:$cc)),
(!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
_.RC:$src1, addr:$src2,
imm:$cc)>;
@@ -2485,8 +2560,8 @@ multiclass avx512_vcmp_sae<X86FoldableSc
"vcmp"#_.Suffix,
"$cc, {sae}, $src2, $src1",
"$src1, $src2, {sae}, $cc",
- (X86cmpmSAE (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
+ (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
+ (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
imm:$cc)>,
EVEX_B, Sched<[sched]>;
}
@@ -5739,6 +5814,7 @@ defm VSCALEF : avx512_fp_scalef_all<0x2C
//===----------------------------------------------------------------------===//
multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode,
+ PatFrag OpNode_su,
X86FoldableSchedWrite sched, X86VectorVTInfo _,
string Name> {
let ExeDomain = _.ExeDomain in {
@@ -5746,12 +5822,15 @@ multiclass avx512_vptest<bits<8> opc, st
defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (OpNode (and _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)>,
+ (OpNode (and _.RC:$src1, _.RC:$src2), _.ImmAllZerosV),
+ (OpNode_su (and _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)>,
EVEX_4V, Sched<[sched]>;
defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(OpNode (and _.RC:$src1, (_.LdFrag addr:$src2)),
+ _.ImmAllZerosV),
+ (OpNode_su (and _.RC:$src1, (_.LdFrag addr:$src2)),
_.ImmAllZerosV)>,
EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
@@ -5762,13 +5841,14 @@ multiclass avx512_vptest<bits<8> opc, st
(_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rr")
_.RC:$src, _.RC:$src))>;
- def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
+ def : Pat<(_.KVT (and _.KRC:$mask, (OpNode_su _.RC:$src, _.ImmAllZerosV))),
(_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rrk")
_.KRC:$mask, _.RC:$src, _.RC:$src))>;
}
multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
- X86FoldableSchedWrite sched, X86VectorVTInfo _> {
+ PatFrag OpNode_su, X86FoldableSchedWrite sched,
+ X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in
defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
@@ -5777,14 +5857,19 @@ multiclass avx512_vptest_mb<bits<8> opc,
(OpNode (and _.RC:$src1,
(X86VBroadcast
(_.ScalarLdFrag addr:$src2))),
- _.ImmAllZerosV)>,
+ _.ImmAllZerosV),
+ (OpNode_su (and _.RC:$src1,
+ (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2))),
+ _.ImmAllZerosV)>,
EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
// Use 512bit version to implement 128/256 bit in case NoVLX.
-multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
- X86VectorVTInfo _, string Name> {
+multiclass avx512_vptest_lowering<PatFrag OpNode, PatFrag OpNode_su,
+ X86VectorVTInfo ExtendInfo, X86VectorVTInfo _,
+ string Name> {
def : Pat<(_.KVT (OpNode (and _.RC:$src1, _.RC:$src2),
_.ImmAllZerosV)),
(_.KVT (COPY_TO_REGCLASS
@@ -5796,8 +5881,8 @@ multiclass avx512_vptest_lowering<PatFra
_.KRC))>;
def : Pat<(_.KVT (and _.KRC:$mask,
- (OpNode (and _.RC:$src1, _.RC:$src2),
- _.ImmAllZerosV))),
+ (OpNode_su (and _.RC:$src1, _.RC:$src2),
+ _.ImmAllZerosV))),
(COPY_TO_REGCLASS
(!cast<Instruction>(Name # "Zrrk")
(COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
@@ -5816,7 +5901,7 @@ multiclass avx512_vptest_lowering<PatFra
_.RC:$src, _.SubRegIdx)),
_.KRC))>;
- def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
+ def : Pat<(_.KVT (and _.KRC:$mask, (OpNode_su _.RC:$src, _.ImmAllZerosV))),
(COPY_TO_REGCLASS
(!cast<Instruction>(Name # "Zrrk")
(COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
@@ -5828,56 +5913,58 @@ multiclass avx512_vptest_lowering<PatFra
}
multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode,
- X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
+ PatFrag OpNode_su, X86SchedWriteWidths sched,
+ AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in
- defm Z : avx512_vptest<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, NAME>,
- avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
+ defm Z : avx512_vptest<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM, _.info512, NAME>,
+ avx512_vptest_mb<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM, _.info512>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
- defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, sched.YMM, _.info256, NAME>,
- avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
- defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, sched.XMM, _.info128, NAME>,
- avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
+ defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM, _.info256, NAME>,
+ avx512_vptest_mb<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM, _.info256>, EVEX_V256;
+ defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM, _.info128, NAME>,
+ avx512_vptest_mb<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM, _.info128>, EVEX_V128;
}
let Predicates = [HasAVX512, NoVLX] in {
- defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, NAME>;
- defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, NAME>;
+ defm Z256_Alt : avx512_vptest_lowering< OpNode, OpNode_su, _.info512, _.info256, NAME>;
+ defm Z128_Alt : avx512_vptest_lowering< OpNode, OpNode_su, _.info512, _.info128, NAME>;
}
}
multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode,
- X86SchedWriteWidths sched> {
- defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, sched,
+ PatFrag OpNode_su, X86SchedWriteWidths sched> {
+ defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, OpNode_su, sched,
avx512vl_i32_info>;
- defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, sched,
+ defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, OpNode_su, sched,
avx512vl_i64_info>, VEX_W;
}
multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
- PatFrag OpNode, X86SchedWriteWidths sched> {
+ PatFrag OpNode, PatFrag OpNode_su,
+ X86SchedWriteWidths sched> {
let Predicates = [HasBWI] in {
- defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.ZMM,
+ defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, OpNode_su, sched.ZMM,
v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
- defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.ZMM,
+ defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, OpNode_su, sched.ZMM,
v64i8_info, NAME#"B">, EVEX_V512;
}
let Predicates = [HasVLX, HasBWI] in {
- defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.YMM,
+ defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, OpNode_su, sched.YMM,
v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
- defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.XMM,
+ defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, OpNode_su, sched.XMM,
v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
- defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.YMM,
+ defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, OpNode_su, sched.YMM,
v32i8x_info, NAME#"B">, EVEX_V256;
- defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.XMM,
+ defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, OpNode_su, sched.XMM,
v16i8x_info, NAME#"B">, EVEX_V128;
}
let Predicates = [HasBWI, NoVLX] in {
- defm BZ256_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v32i8x_info, NAME#"B">;
- defm BZ128_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v16i8x_info, NAME#"B">;
- defm WZ256_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v16i16x_info, NAME#"W">;
- defm WZ128_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v8i16x_info, NAME#"W">;
+ defm BZ256_Alt : avx512_vptest_lowering<OpNode, OpNode_su, v64i8_info, v32i8x_info, NAME#"B">;
+ defm BZ128_Alt : avx512_vptest_lowering<OpNode, OpNode_su, v64i8_info, v16i8x_info, NAME#"B">;
+ defm WZ256_Alt : avx512_vptest_lowering<OpNode, OpNode_su, v32i16_info, v16i16x_info, NAME#"W">;
+ defm WZ128_Alt : avx512_vptest_lowering<OpNode, OpNode_su, v32i16_info, v8i16x_info, NAME#"W">;
}
}
@@ -5889,19 +5976,29 @@ def X86pcmpeqm : PatFrag<(ops node:$src1
def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2),
(setcc node:$src1, node:$src2, SETNE)>;
+def X86pcmpeqm_su : PatFrag<(ops node:$src1, node:$src2),
+ (X86pcmpeqm node:$src1, node:$src2), [{
+ return N->hasOneUse();
+}]>;
+def X86pcmpnem_su : PatFrag<(ops node:$src1, node:$src2),
+ (X86pcmpnem node:$src1, node:$src2), [{
+ return N->hasOneUse();
+}]>;
+
multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
- PatFrag OpNode, X86SchedWriteWidths sched> :
- avx512_vptest_wb<opc_wb, OpcodeStr, OpNode, sched>,
- avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, sched>;
+ PatFrag OpNode, PatFrag OpNode_su,
+ X86SchedWriteWidths sched> :
+ avx512_vptest_wb<opc_wb, OpcodeStr, OpNode, OpNode_su, sched>,
+ avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, OpNode_su, sched>;
defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem,
- SchedWriteVecLogic>, T8PD;
+ X86pcmpnem_su, SchedWriteVecLogic>, T8PD;
defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm,
- SchedWriteVecLogic>, T8XS;
+ X86pcmpeqm_su, SchedWriteVecLogic>, T8XS;
multiclass avx512_vptest_lowering_pats<string InstrStr, PatFrag OpNode,
- X86VectorVTInfo _,
+ PatFrag OpNode_su, X86VectorVTInfo _,
X86VectorVTInfo AndInfo> {
def : Pat<(_.KVT (OpNode (bitconvert
(AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
@@ -5909,9 +6006,9 @@ multiclass avx512_vptest_lowering_pats<s
(!cast<Instruction>(InstrStr # "rr") _.RC:$src1, _.RC:$src2)>;
def : Pat<(_.KVT (and _.KRC:$mask,
- (OpNode (bitconvert
- (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
- _.ImmAllZerosV))),
+ (OpNode_su (bitconvert
+ (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
+ _.ImmAllZerosV))),
(!cast<Instruction>(InstrStr # "rrk") _.KRC:$mask, _.RC:$src1,
_.RC:$src2)>;
@@ -5922,16 +6019,17 @@ multiclass avx512_vptest_lowering_pats<s
(!cast<Instruction>(InstrStr # "rm") _.RC:$src1, addr:$src2)>;
def : Pat<(_.KVT (and _.KRC:$mask,
- (OpNode (bitconvert
- (AndInfo.VT (and _.RC:$src1,
- (AndInfo.LdFrag addr:$src2)))),
- _.ImmAllZerosV))),
+ (OpNode_su (bitconvert
+ (AndInfo.VT (and _.RC:$src1,
+ (AndInfo.LdFrag addr:$src2)))),
+ _ .ImmAllZerosV))),
(!cast<Instruction>(InstrStr # "rmk") _.KRC:$mask, _.RC:$src1,
addr:$src2)>;
}
// Patterns to use 512-bit instructions when 128/256 are not available.
multiclass avx512_vptest_lowering_wide_pats<string InstrStr, PatFrag OpNode,
+ PatFrag OpNode_su,
X86VectorVTInfo _,
X86VectorVTInfo AndInfo,
X86VectorVTInfo ExtendInfo> {
@@ -5947,9 +6045,9 @@ multiclass avx512_vptest_lowering_wide_p
_.KRC))>;
def : Pat<(_.KVT (and _.KRC:$mask,
- (OpNode (bitconvert
- (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
- _.ImmAllZerosV))),
+ (OpNode_su (bitconvert
+ (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
+ _.ImmAllZerosV))),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstrStr#"rrk")
(COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
@@ -5961,62 +6059,63 @@ multiclass avx512_vptest_lowering_wide_p
}
multiclass avx512_vptest_lowering_sizes<string InstrStr, PatFrag OpNode,
- Predicate prd,
+ PatFrag OpNode_su, Predicate prd,
AVX512VLVectorVTInfo CmpInfo,
AVX512VLVectorVTInfo AndInfo> {
let Predicates = [prd, HasVLX] in {
- defm : avx512_vptest_lowering_pats<InstrStr#"Z128", OpNode,
+ defm : avx512_vptest_lowering_pats<InstrStr#"Z128", OpNode, OpNode_su,
CmpInfo.info128, AndInfo.info128>;
- defm : avx512_vptest_lowering_pats<InstrStr#"Z256", OpNode,
+ defm : avx512_vptest_lowering_pats<InstrStr#"Z256", OpNode, OpNode_su,
CmpInfo.info256, AndInfo.info256>;
}
let Predicates = [prd] in {
- defm : avx512_vptest_lowering_pats<InstrStr#"Z", OpNode,
+ defm : avx512_vptest_lowering_pats<InstrStr#"Z", OpNode, OpNode_su,
CmpInfo.info512, AndInfo.info512>;
}
let Predicates = [prd, NoVLX] in {
- defm : avx512_vptest_lowering_wide_pats<InstrStr#"Z", OpNode,
+ defm : avx512_vptest_lowering_wide_pats<InstrStr#"Z", OpNode, OpNode_su,
CmpInfo.info128, AndInfo.info128,
CmpInfo.info512>;
- defm : avx512_vptest_lowering_wide_pats<InstrStr#"Z", OpNode,
+ defm : avx512_vptest_lowering_wide_pats<InstrStr#"Z", OpNode, OpNode_su,
CmpInfo.info256, AndInfo.info256,
CmpInfo.info512>;
}
}
-multiclass avx512_vptest_lowering_types<string InstrStr, PatFrag OpNode> {
- defm : avx512_vptest_lowering_sizes<InstrStr # "B", OpNode, HasBWI,
+multiclass avx512_vptest_lowering_types<string InstrStr, PatFrag OpNode,
+ PatFrag OpNode_su> {
+ defm : avx512_vptest_lowering_sizes<InstrStr # "B", OpNode, OpNode_su, HasBWI,
avx512vl_i8_info, avx512vl_i16_info>;
- defm : avx512_vptest_lowering_sizes<InstrStr # "B", OpNode, HasBWI,
+ defm : avx512_vptest_lowering_sizes<InstrStr # "B", OpNode, OpNode_su, HasBWI,
avx512vl_i8_info, avx512vl_i32_info>;
- defm : avx512_vptest_lowering_sizes<InstrStr # "B", OpNode, HasBWI,
+ defm : avx512_vptest_lowering_sizes<InstrStr # "B", OpNode, OpNode_su, HasBWI,
avx512vl_i8_info, avx512vl_i64_info>;
- defm : avx512_vptest_lowering_sizes<InstrStr # "W", OpNode, HasBWI,
+ defm : avx512_vptest_lowering_sizes<InstrStr # "W", OpNode, OpNode_su, HasBWI,
avx512vl_i16_info, avx512vl_i8_info>;
- defm : avx512_vptest_lowering_sizes<InstrStr # "W", OpNode, HasBWI,
+ defm : avx512_vptest_lowering_sizes<InstrStr # "W", OpNode, OpNode_su, HasBWI,
avx512vl_i16_info, avx512vl_i32_info>;
- defm : avx512_vptest_lowering_sizes<InstrStr # "W", OpNode, HasBWI,
+ defm : avx512_vptest_lowering_sizes<InstrStr # "W", OpNode, OpNode_su, HasBWI,
avx512vl_i16_info, avx512vl_i64_info>;
- defm : avx512_vptest_lowering_sizes<InstrStr # "D", OpNode, HasAVX512,
+ defm : avx512_vptest_lowering_sizes<InstrStr # "D", OpNode, OpNode_su, HasAVX512,
avx512vl_i32_info, avx512vl_i8_info>;
- defm : avx512_vptest_lowering_sizes<InstrStr # "D", OpNode, HasAVX512,
+ defm : avx512_vptest_lowering_sizes<InstrStr # "D", OpNode, OpNode_su, HasAVX512,
avx512vl_i32_info, avx512vl_i16_info>;
- defm : avx512_vptest_lowering_sizes<InstrStr # "D", OpNode, HasAVX512,
+ defm : avx512_vptest_lowering_sizes<InstrStr # "D", OpNode, OpNode_su, HasAVX512,
avx512vl_i32_info, avx512vl_i64_info>;
- defm : avx512_vptest_lowering_sizes<InstrStr # "Q", OpNode, HasAVX512,
+ defm : avx512_vptest_lowering_sizes<InstrStr # "Q", OpNode, OpNode_su, HasAVX512,
avx512vl_i64_info, avx512vl_i8_info>;
- defm : avx512_vptest_lowering_sizes<InstrStr # "Q", OpNode, HasAVX512,
+ defm : avx512_vptest_lowering_sizes<InstrStr # "Q", OpNode, OpNode_su, HasAVX512,
avx512vl_i64_info, avx512vl_i16_info>;
- defm : avx512_vptest_lowering_sizes<InstrStr # "Q", OpNode, HasAVX512,
+ defm : avx512_vptest_lowering_sizes<InstrStr # "Q", OpNode, OpNode_su, HasAVX512,
avx512vl_i64_info, avx512vl_i32_info>;
}
-defm : avx512_vptest_lowering_types<"VPTESTM", X86pcmpnem>;
-defm : avx512_vptest_lowering_types<"VPTESTNM", X86pcmpeqm>;
+defm : avx512_vptest_lowering_types<"VPTESTM", X86pcmpnem, X86pcmpnem_su>;
+defm : avx512_vptest_lowering_types<"VPTESTNM", X86pcmpeqm, X86pcmpeqm_su>;
//===----------------------------------------------------------------------===//
// AVX-512 Shift instructions
@@ -12469,12 +12568,19 @@ defm VPOPCNTW : avx512_unary_rm_vl<0x54,
defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
+def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
+ (X86Vpshufbitqmb node:$src1, node:$src2), [{
+ return N->hasOneUse();
+}]>;
+
multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
(ins VTI.RC:$src1, VTI.RC:$src2),
"vpshufbitqmb",
"$src2, $src1", "$src1, $src2",
(X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
+ (VTI.VT VTI.RC:$src2)),
+ (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
(VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
Sched<[sched]>;
defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
@@ -12482,6 +12588,8 @@ multiclass VPSHUFBITQMB_rm<X86FoldableSc
"vpshufbitqmb",
"$src2, $src1", "$src1, $src2",
(X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
+ (VTI.VT (VTI.LdFrag addr:$src2))),
+ (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
(VTI.VT (VTI.LdFrag addr:$src2)))>,
EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
Sched<[sched.Folded, sched.ReadAfterFold]>;
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll?rev=358358&r1=358357&r2=358358&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll Sun Apr 14 11:26:06 2019
@@ -9254,7 +9254,7 @@ define i8 at test_int_x86_avx512_ptestm_d_1
; X86-NEXT: vptestmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc1]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT: vptestmd %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x27,0xc9]
+; X86-NEXT: kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
@@ -9265,10 +9265,10 @@ define i8 at test_int_x86_avx512_ptestm_d_1
; X64: # %bb.0:
; X64-NEXT: vptestmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc1]
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
-; X64-NEXT: vptestmd %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x27,0xc9]
-; X64-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
-; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; X64-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
+; X64-NEXT: kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
+; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
+; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; X64-NEXT: leal (%rcx,%rax), %eax # encoding: [0x8d,0x04,0x01]
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq # encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
@@ -9313,7 +9313,7 @@ define i8 at test_int_x86_avx512_ptestm_q_1
; X86-NEXT: vptestmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc1]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT: vptestmq %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x27,0xc9]
+; X86-NEXT: kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
@@ -9324,10 +9324,10 @@ define i8 at test_int_x86_avx512_ptestm_q_1
; X64: # %bb.0:
; X64-NEXT: vptestmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc1]
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
-; X64-NEXT: vptestmq %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x27,0xc9]
-; X64-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
-; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; X64-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
+; X64-NEXT: kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
+; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
+; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; X64-NEXT: leal (%rcx,%rax), %eax # encoding: [0x8d,0x04,0x01]
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq # encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
@@ -9344,7 +9344,7 @@ define i8 at test_int_x86_avx512_ptestm_q_2
; X86-NEXT: vptestmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc1]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT: vptestmq %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x27,0xc9]
+; X86-NEXT: kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
@@ -9356,10 +9356,10 @@ define i8 at test_int_x86_avx512_ptestm_q_2
; X64: # %bb.0:
; X64-NEXT: vptestmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc1]
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
-; X64-NEXT: vptestmq %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x27,0xc9]
-; X64-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
-; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; X64-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
+; X64-NEXT: kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
+; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
+; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; X64-NEXT: leal (%rcx,%rax), %eax # encoding: [0x8d,0x04,0x01]
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
@@ -9377,7 +9377,7 @@ define i8 at test_int_x86_avx512_ptestnm_d_
; X86-NEXT: vptestnmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x27,0xc1]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT: vptestnmd %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x27,0xc9]
+; X86-NEXT: kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
@@ -9388,10 +9388,10 @@ define i8 at test_int_x86_avx512_ptestnm_d_
; X64: # %bb.0:
; X64-NEXT: vptestnmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x27,0xc1]
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
-; X64-NEXT: vptestnmd %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x27,0xc9]
-; X64-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
-; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; X64-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
+; X64-NEXT: kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
+; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
+; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; X64-NEXT: leal (%rcx,%rax), %eax # encoding: [0x8d,0x04,0x01]
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq # encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
@@ -9436,7 +9436,7 @@ define i8 at test_int_x86_avx512_ptestnm_q_
; X86-NEXT: vptestnmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x27,0xc1]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT: vptestnmq %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfe,0x09,0x27,0xc9]
+; X86-NEXT: kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
@@ -9447,10 +9447,10 @@ define i8 at test_int_x86_avx512_ptestnm_q_
; X64: # %bb.0:
; X64-NEXT: vptestnmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x27,0xc1]
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
-; X64-NEXT: vptestnmq %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfe,0x09,0x27,0xc9]
-; X64-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
-; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; X64-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
+; X64-NEXT: kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
+; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
+; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; X64-NEXT: leal (%rcx,%rax), %eax # encoding: [0x8d,0x04,0x01]
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq # encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
@@ -9467,7 +9467,7 @@ define i8 at test_int_x86_avx512_ptestnm_q_
; X86-NEXT: vptestnmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc1]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT: vptestnmq %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfe,0x29,0x27,0xc9]
+; X86-NEXT: kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
@@ -9479,10 +9479,10 @@ define i8 at test_int_x86_avx512_ptestnm_q_
; X64: # %bb.0:
; X64-NEXT: vptestnmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc1]
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
-; X64-NEXT: vptestnmq %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfe,0x29,0x27,0xc9]
-; X64-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
-; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; X64-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
+; X64-NEXT: kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
+; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
+; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; X64-NEXT: leal (%rcx,%rax), %eax # encoding: [0x8d,0x04,0x01]
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
Modified: llvm/trunk/test/CodeGen/X86/vec_uaddo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_uaddo.ll?rev=358358&r1=358357&r2=358358&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_uaddo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_uaddo.ll Sun Apr 14 11:26:06 2019
@@ -1202,14 +1202,13 @@ define <4 x i32> @uaddo_v4i1(<4 x i1> %a
; AVX512: # %bb.0:
; AVX512-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k0
-; AVX512-NEXT: vpslld $31, %xmm1, %xmm1
-; AVX512-NEXT: vptestmd %xmm1, %xmm1, %k1
-; AVX512-NEXT: kxorw %k1, %k0, %k2
-; AVX512-NEXT: kxnorw %k1, %k0, %k1
-; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k1 {%k1}
+; AVX512-NEXT: vpslld $31, %xmm1, %xmm0
+; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k1
+; AVX512-NEXT: kxorw %k1, %k0, %k1
+; AVX512-NEXT: kandnw %k0, %k1, %k2
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: kmovd %k2, %eax
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k2} {z}
+; AVX512-NEXT: kmovd %k1, %eax
; AVX512-NEXT: movb %al, (%rdi)
; AVX512-NEXT: retq
%t = call {<4 x i1>, <4 x i1>} @llvm.uadd.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1)
Modified: llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll?rev=358358&r1=358357&r2=358358&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll Sun Apr 14 11:26:06 2019
@@ -1246,7 +1246,7 @@ define i1 @bool_reduction_v16i8(<16 x i8
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
; AVX512-NEXT: kshiftrw $8, %k0, %k1
-; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
+; AVX512-NEXT: kandw %k0, %k1, %k0
; AVX512-NEXT: kshiftrw $4, %k0, %k1
; AVX512-NEXT: kandw %k0, %k1, %k0
; AVX512-NEXT: kshiftrw $2, %k0, %k1
@@ -1436,7 +1436,7 @@ define i1 @bool_reduction_v16i16(<16 x i
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
; AVX512-NEXT: kshiftrw $8, %k0, %k1
-; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
+; AVX512-NEXT: kandw %k0, %k1, %k0
; AVX512-NEXT: kshiftrw $4, %k0, %k1
; AVX512-NEXT: kandw %k0, %k1, %k0
; AVX512-NEXT: kshiftrw $2, %k0, %k1
@@ -1497,7 +1497,7 @@ define i1 @bool_reduction_v32i8(<32 x i8
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpeqb %ymm1, %ymm0, %k0
; AVX512-NEXT: kshiftrd $16, %k0, %k1
-; AVX512-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1}
+; AVX512-NEXT: kandd %k0, %k1, %k0
; AVX512-NEXT: kshiftrd $8, %k0, %k1
; AVX512-NEXT: kandd %k0, %k1, %k0
; AVX512-NEXT: kshiftrd $4, %k0, %k1
More information about the llvm-commits
mailing list