[llvm] r314133 - [AVX-512] Replace large number of explicit patterns that check for insert_subvector with zero after masked compares with fewer patterns with predicate
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 25 11:43:13 PDT 2017
Author: ctopper
Date: Mon Sep 25 11:43:13 2017
New Revision: 314133
URL: http://llvm.org/viewvc/llvm-project?rev=314133&view=rev
Log:
[AVX-512] Replace large number of explicit patterns that check for insert_subvector with zero after masked compares with fewer patterns with predicate
This replaces the large number of patterns that handle every possible case of zeroing after a masked compare with a few simpler patterns that use a predicate to check for a masked compare producer.
This is similar to what we do for detecting free GR32->GR64 zero extends and free xmm->ymm/zmm zero extends.
This shrinks the isel table from ~590k to ~531k. This is a roughly 10% reduction in size.
Differential Revision: https://reviews.llvm.org/D38217
Modified:
llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td
Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=314133&r1=314132&r2=314133&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Mon Sep 25 11:43:13 2017
@@ -442,10 +442,45 @@ namespace {
bool foldLoadStoreIntoMemOperand(SDNode *Node);
bool matchBEXTRFromAnd(SDNode *Node);
+
+ bool isMaskZeroExtended(SDNode *N) const;
};
}
+// Returns true if this masked compare can be implemented legally with this
+// type.
+static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
+ if (N->getOpcode() == X86ISD::PCMPEQM ||
+ N->getOpcode() == X86ISD::PCMPGTM ||
+ N->getOpcode() == X86ISD::CMPM ||
+ N->getOpcode() == X86ISD::CMPMU) {
+ // We can get 256-bit 8 element types here without VLX being enabled. When
+ // this happens we will use 512-bit operations and the mask will not be
+ // zero extended.
+ if (N->getOperand(0).getValueType() == MVT::v8i32 ||
+ N->getOperand(0).getValueType() == MVT::v8f32)
+ return Subtarget->hasVLX();
+
+ return true;
+ }
+
+ return false;
+}
+
+// Returns true if we can assume the writer of the mask has zero extended it
+// for us.
+bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const {
+ // If this is an AND, check if we have a compare on either side. As long as
+ // one side guarantees the mask is zero extended, the AND will preserve those
+ // zeros.
+ if (N->getOpcode() == ISD::AND)
+ return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) ||
+ isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget);
+
+ return isLegalMaskCompare(N, Subtarget);
+}
+
bool
X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
if (OptLevel == CodeGenOpt::None) return false;
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=314133&r1=314132&r2=314133&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Mon Sep 25 11:43:13 2017
@@ -1866,217 +1866,6 @@ defm VPCMPGTQ : avx512_icmp_packed_rmb_v
T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
-multiclass avx512_icmp_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
- SDNode OpNode, string InstrStr,
- list<Predicate> Preds> {
-let Predicates = Preds in {
- def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))),
- (i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rr) _.RC:$src1, _.RC:$src2),
- NewInf.KRC)>;
-
- def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (OpNode (_.VT _.RC:$src1),
- (_.VT (bitconvert (_.LdFrag addr:$src2))))),
- (i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rm) _.RC:$src1, addr:$src2),
- NewInf.KRC)>;
-
- def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (and _.KRCWM:$mask,
- (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))),
- (i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrk) _.KRCWM:$mask,
- _.RC:$src1, _.RC:$src2),
- NewInf.KRC)>;
-
- def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (and (_.KVT _.KRCWM:$mask),
- (_.KVT (OpNode (_.VT _.RC:$src1),
- (_.VT (bitconvert
- (_.LdFrag addr:$src2))))))),
- (i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmk) _.KRCWM:$mask,
- _.RC:$src1, addr:$src2),
- NewInf.KRC)>;
-}
-}
-
-multiclass avx512_icmp_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
- SDNode OpNode, string InstrStr,
- list<Predicate> Preds>
- : avx512_icmp_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> {
-let Predicates = Preds in {
- def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (OpNode (_.VT _.RC:$src1),
- (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
- (i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmb) _.RC:$src1, addr:$src2),
- NewInf.KRC)>;
-
- def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (and (_.KVT _.KRCWM:$mask),
- (_.KVT (OpNode (_.VT _.RC:$src1),
- (X86VBroadcast
- (_.ScalarLdFrag addr:$src2)))))),
- (i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbk) _.KRCWM:$mask,
- _.RC:$src1, addr:$src2),
- NewInf.KRC)>;
-}
-}
-
-// VPCMPEQB - i8
-defm : avx512_icmp_packed_lowering<v16i8x_info, v32i1_info, X86pcmpeqm,
- "VPCMPEQBZ128", [HasBWI, HasVLX]>;
-defm : avx512_icmp_packed_lowering<v16i8x_info, v64i1_info, X86pcmpeqm,
- "VPCMPEQBZ128", [HasBWI, HasVLX]>;
-
-defm : avx512_icmp_packed_lowering<v32i8x_info, v64i1_info, X86pcmpeqm,
- "VPCMPEQBZ256", [HasBWI, HasVLX]>;
-
-// VPCMPEQW - i16
-defm : avx512_icmp_packed_lowering<v8i16x_info, v16i1_info, X86pcmpeqm,
- "VPCMPEQWZ128", [HasBWI, HasVLX]>;
-defm : avx512_icmp_packed_lowering<v8i16x_info, v32i1_info, X86pcmpeqm,
- "VPCMPEQWZ128", [HasBWI, HasVLX]>;
-defm : avx512_icmp_packed_lowering<v8i16x_info, v64i1_info, X86pcmpeqm,
- "VPCMPEQWZ128", [HasBWI, HasVLX]>;
-
-defm : avx512_icmp_packed_lowering<v16i16x_info, v32i1_info, X86pcmpeqm,
- "VPCMPEQWZ256", [HasBWI, HasVLX]>;
-defm : avx512_icmp_packed_lowering<v16i16x_info, v64i1_info, X86pcmpeqm,
- "VPCMPEQWZ256", [HasBWI, HasVLX]>;
-
-defm : avx512_icmp_packed_lowering<v32i16_info, v64i1_info, X86pcmpeqm,
- "VPCMPEQWZ", [HasBWI]>;
-
-// VPCMPEQD - i32
-defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v8i1_info, X86pcmpeqm,
- "VPCMPEQDZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v16i1_info, X86pcmpeqm,
- "VPCMPEQDZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v32i1_info, X86pcmpeqm,
- "VPCMPEQDZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v64i1_info, X86pcmpeqm,
- "VPCMPEQDZ128", [HasAVX512, HasVLX]>;
-
-defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v16i1_info, X86pcmpeqm,
- "VPCMPEQDZ256", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v32i1_info, X86pcmpeqm,
- "VPCMPEQDZ256", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v64i1_info, X86pcmpeqm,
- "VPCMPEQDZ256", [HasAVX512, HasVLX]>;
-
-defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v32i1_info, X86pcmpeqm,
- "VPCMPEQDZ", [HasAVX512]>;
-defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v64i1_info, X86pcmpeqm,
- "VPCMPEQDZ", [HasAVX512]>;
-
-// VPCMPEQQ - i64
-defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v4i1_info, X86pcmpeqm,
- "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v8i1_info, X86pcmpeqm,
- "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v16i1_info, X86pcmpeqm,
- "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v32i1_info, X86pcmpeqm,
- "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v64i1_info, X86pcmpeqm,
- "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
-
-defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v8i1_info, X86pcmpeqm,
- "VPCMPEQQZ256", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v16i1_info, X86pcmpeqm,
- "VPCMPEQQZ256", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpeqm,
- "VPCMPEQQZ256", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpeqm,
- "VPCMPEQQZ256", [HasAVX512, HasVLX]>;
-
-defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpeqm,
- "VPCMPEQQZ", [HasAVX512]>;
-defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpeqm,
- "VPCMPEQQZ", [HasAVX512]>;
-defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v64i1_info, X86pcmpeqm,
- "VPCMPEQQZ", [HasAVX512]>;
-
-// VPCMPGTB - i8
-defm : avx512_icmp_packed_lowering<v16i8x_info, v32i1_info, X86pcmpgtm,
- "VPCMPGTBZ128", [HasBWI, HasVLX]>;
-defm : avx512_icmp_packed_lowering<v16i8x_info, v64i1_info, X86pcmpgtm,
- "VPCMPGTBZ128", [HasBWI, HasVLX]>;
-
-defm : avx512_icmp_packed_lowering<v32i8x_info, v64i1_info, X86pcmpgtm,
- "VPCMPGTBZ256", [HasBWI, HasVLX]>;
-
-// VPCMPGTW - i16
-defm : avx512_icmp_packed_lowering<v8i16x_info, v16i1_info, X86pcmpgtm,
- "VPCMPGTWZ128", [HasBWI, HasVLX]>;
-defm : avx512_icmp_packed_lowering<v8i16x_info, v32i1_info, X86pcmpgtm,
- "VPCMPGTWZ128", [HasBWI, HasVLX]>;
-defm : avx512_icmp_packed_lowering<v8i16x_info, v64i1_info, X86pcmpgtm,
- "VPCMPGTWZ128", [HasBWI, HasVLX]>;
-
-defm : avx512_icmp_packed_lowering<v16i16x_info, v32i1_info, X86pcmpgtm,
- "VPCMPGTWZ256", [HasBWI, HasVLX]>;
-defm : avx512_icmp_packed_lowering<v16i16x_info, v64i1_info, X86pcmpgtm,
- "VPCMPGTWZ256", [HasBWI, HasVLX]>;
-
-defm : avx512_icmp_packed_lowering<v32i16_info, v64i1_info, X86pcmpgtm,
- "VPCMPGTWZ", [HasBWI]>;
-
-// VPCMPGTD - i32
-defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v8i1_info, X86pcmpgtm,
- "VPCMPGTDZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v16i1_info, X86pcmpgtm,
- "VPCMPGTDZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v32i1_info, X86pcmpgtm,
- "VPCMPGTDZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v64i1_info, X86pcmpgtm,
- "VPCMPGTDZ128", [HasAVX512, HasVLX]>;
-
-defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v16i1_info, X86pcmpgtm,
- "VPCMPGTDZ256", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v32i1_info, X86pcmpgtm,
- "VPCMPGTDZ256", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v64i1_info, X86pcmpgtm,
- "VPCMPGTDZ256", [HasAVX512, HasVLX]>;
-
-defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v32i1_info, X86pcmpgtm,
- "VPCMPGTDZ", [HasAVX512]>;
-defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v64i1_info, X86pcmpgtm,
- "VPCMPGTDZ", [HasAVX512]>;
-
-// VPCMPGTQ - i64
-defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v4i1_info, X86pcmpgtm,
- "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v8i1_info, X86pcmpgtm,
- "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v16i1_info, X86pcmpgtm,
- "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v32i1_info, X86pcmpgtm,
- "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v64i1_info, X86pcmpgtm,
- "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
-
-defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v8i1_info, X86pcmpgtm,
- "VPCMPGTQZ256", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v16i1_info, X86pcmpgtm,
- "VPCMPGTQZ256", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpgtm,
- "VPCMPGTQZ256", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpgtm,
- "VPCMPGTQZ256", [HasAVX512, HasVLX]>;
-
-defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpgtm,
- "VPCMPGTQZ", [HasAVX512]>;
-defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpgtm,
- "VPCMPGTQZ", [HasAVX512]>;
-defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v64i1_info, X86pcmpgtm,
- "VPCMPGTQZ", [HasAVX512]>;
-
multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
X86VectorVTInfo _> {
let isCommutable = 1 in
@@ -2238,236 +2027,6 @@ defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F
defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
-multiclass avx512_icmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
- SDNode OpNode, string InstrStr,
- list<Predicate> Preds> {
-let Predicates = Preds in {
- def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (OpNode (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- imm:$cc)),
- (i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1,
- _.RC:$src2,
- imm:$cc),
- NewInf.KRC)>;
-
- def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (OpNode (_.VT _.RC:$src1),
- (_.VT (bitconvert (_.LdFrag addr:$src2))),
- imm:$cc)),
- (i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmi) _.RC:$src1,
- addr:$src2,
- imm:$cc),
- NewInf.KRC)>;
-
- def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (and _.KRCWM:$mask,
- (OpNode (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- imm:$cc))),
- (i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrik) _.KRCWM:$mask,
- _.RC:$src1,
- _.RC:$src2,
- imm:$cc),
- NewInf.KRC)>;
-
- def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (and (_.KVT _.KRCWM:$mask),
- (_.KVT (OpNode (_.VT _.RC:$src1),
- (_.VT (bitconvert
- (_.LdFrag addr:$src2))),
- imm:$cc)))),
- (i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmik) _.KRCWM:$mask,
- _.RC:$src1,
- addr:$src2,
- imm:$cc),
- NewInf.KRC)>;
-}
-}
-
-multiclass avx512_icmp_cc_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
- SDNode OpNode, string InstrStr,
- list<Predicate> Preds>
- : avx512_icmp_cc_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> {
-let Predicates = Preds in {
- def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (OpNode (_.VT _.RC:$src1),
- (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
- imm:$cc)),
- (i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmib) _.RC:$src1,
- addr:$src2,
- imm:$cc),
- NewInf.KRC)>;
-
- def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (and (_.KVT _.KRCWM:$mask),
- (_.KVT (OpNode (_.VT _.RC:$src1),
- (X86VBroadcast
- (_.ScalarLdFrag addr:$src2)),
- imm:$cc)))),
- (i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmibk) _.KRCWM:$mask,
- _.RC:$src1,
- addr:$src2,
- imm:$cc),
- NewInf.KRC)>;
-}
-}
-
-// VPCMPB - i8
-defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v32i1_info, X86cmpm,
- "VPCMPBZ128", [HasBWI, HasVLX]>;
-defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v64i1_info, X86cmpm,
- "VPCMPBZ128", [HasBWI, HasVLX]>;
-
-defm : avx512_icmp_cc_packed_lowering<v32i8x_info, v64i1_info, X86cmpm,
- "VPCMPBZ256", [HasBWI, HasVLX]>;
-
-// VPCMPW - i16
-defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v16i1_info, X86cmpm,
- "VPCMPWZ128", [HasBWI, HasVLX]>;
-defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v32i1_info, X86cmpm,
- "VPCMPWZ128", [HasBWI, HasVLX]>;
-defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v64i1_info, X86cmpm,
- "VPCMPWZ128", [HasBWI, HasVLX]>;
-
-defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v32i1_info, X86cmpm,
- "VPCMPWZ256", [HasBWI, HasVLX]>;
-defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v64i1_info, X86cmpm,
- "VPCMPWZ256", [HasBWI, HasVLX]>;
-
-defm : avx512_icmp_cc_packed_lowering<v32i16_info, v64i1_info, X86cmpm,
- "VPCMPWZ", [HasBWI]>;
-
-// VPCMPD - i32
-defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v8i1_info, X86cmpm,
- "VPCMPDZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v16i1_info, X86cmpm,
- "VPCMPDZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v32i1_info, X86cmpm,
- "VPCMPDZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v64i1_info, X86cmpm,
- "VPCMPDZ128", [HasAVX512, HasVLX]>;
-
-defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v16i1_info, X86cmpm,
- "VPCMPDZ256", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v32i1_info, X86cmpm,
- "VPCMPDZ256", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v64i1_info, X86cmpm,
- "VPCMPDZ256", [HasAVX512, HasVLX]>;
-
-defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v32i1_info, X86cmpm,
- "VPCMPDZ", [HasAVX512]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v64i1_info, X86cmpm,
- "VPCMPDZ", [HasAVX512]>;
-
-// VPCMPQ - i64
-defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v4i1_info, X86cmpm,
- "VPCMPQZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v8i1_info, X86cmpm,
- "VPCMPQZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v16i1_info, X86cmpm,
- "VPCMPQZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v32i1_info, X86cmpm,
- "VPCMPQZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v64i1_info, X86cmpm,
- "VPCMPQZ128", [HasAVX512, HasVLX]>;
-
-defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v8i1_info, X86cmpm,
- "VPCMPQZ256", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v16i1_info, X86cmpm,
- "VPCMPQZ256", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v32i1_info, X86cmpm,
- "VPCMPQZ256", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v64i1_info, X86cmpm,
- "VPCMPQZ256", [HasAVX512, HasVLX]>;
-
-defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v16i1_info, X86cmpm,
- "VPCMPQZ", [HasAVX512]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v32i1_info, X86cmpm,
- "VPCMPQZ", [HasAVX512]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v64i1_info, X86cmpm,
- "VPCMPQZ", [HasAVX512]>;
-
-// VPCMPUB - i8
-defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v32i1_info, X86cmpmu,
- "VPCMPUBZ128", [HasBWI, HasVLX]>;
-defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v64i1_info, X86cmpmu,
- "VPCMPUBZ128", [HasBWI, HasVLX]>;
-
-defm : avx512_icmp_cc_packed_lowering<v32i8x_info, v64i1_info, X86cmpmu,
- "VPCMPUBZ256", [HasBWI, HasVLX]>;
-
-// VPCMPUW - i16
-defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v16i1_info, X86cmpmu,
- "VPCMPUWZ128", [HasBWI, HasVLX]>;
-defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v32i1_info, X86cmpmu,
- "VPCMPUWZ128", [HasBWI, HasVLX]>;
-defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v64i1_info, X86cmpmu,
- "VPCMPUWZ128", [HasBWI, HasVLX]>;
-
-defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v32i1_info, X86cmpmu,
- "VPCMPUWZ256", [HasBWI, HasVLX]>;
-defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v64i1_info, X86cmpmu,
- "VPCMPUWZ256", [HasBWI, HasVLX]>;
-
-defm : avx512_icmp_cc_packed_lowering<v32i16_info, v64i1_info, X86cmpmu,
- "VPCMPUWZ", [HasBWI]>;
-
-// VPCMPUD - i32
-defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v8i1_info, X86cmpmu,
- "VPCMPUDZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v16i1_info, X86cmpmu,
- "VPCMPUDZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v32i1_info, X86cmpmu,
- "VPCMPUDZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v64i1_info, X86cmpmu,
- "VPCMPUDZ128", [HasAVX512, HasVLX]>;
-
-defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v16i1_info, X86cmpmu,
- "VPCMPUDZ256", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v32i1_info, X86cmpmu,
- "VPCMPUDZ256", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v64i1_info, X86cmpmu,
- "VPCMPUDZ256", [HasAVX512, HasVLX]>;
-
-defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v32i1_info, X86cmpmu,
- "VPCMPUDZ", [HasAVX512]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v64i1_info, X86cmpmu,
- "VPCMPUDZ", [HasAVX512]>;
-
-// VPCMPUQ - i64
-defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v4i1_info, X86cmpmu,
- "VPCMPUQZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v8i1_info, X86cmpmu,
- "VPCMPUQZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v16i1_info, X86cmpmu,
- "VPCMPUQZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v32i1_info, X86cmpmu,
- "VPCMPUQZ128", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v64i1_info, X86cmpmu,
- "VPCMPUQZ128", [HasAVX512, HasVLX]>;
-
-defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v8i1_info, X86cmpmu,
- "VPCMPUQZ256", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v16i1_info, X86cmpmu,
- "VPCMPUQZ256", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v32i1_info, X86cmpmu,
- "VPCMPUQZ256", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v64i1_info, X86cmpmu,
- "VPCMPUQZ256", [HasAVX512, HasVLX]>;
-
-defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v16i1_info, X86cmpmu,
- "VPCMPUQZ", [HasAVX512]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v32i1_info, X86cmpmu,
- "VPCMPUQZ", [HasAVX512]>;
-defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v64i1_info, X86cmpmu,
- "VPCMPUQZ", [HasAVX512]>;
multiclass avx512_vcmp_common<X86VectorVTInfo _> {
@@ -2559,159 +2118,6 @@ defm VCMPPD : avx512_vcmp<avx512vl_f64_i
defm VCMPPS : avx512_vcmp<avx512vl_f32_info>,
AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
-multiclass avx512_fcmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
- string InstrStr, list<Predicate> Preds> {
-let Predicates = Preds in {
- def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (X86cmpm (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- imm:$cc)),
- (i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1,
- _.RC:$src2,
- imm:$cc),
- NewInf.KRC)>;
-
- def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (and _.KRCWM:$mask,
- (X86cmpm (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- imm:$cc))),
- (i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrik) _.KRCWM:$mask,
- _.RC:$src1,
- _.RC:$src2,
- imm:$cc),
- NewInf.KRC)>;
-
- def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (X86cmpm (_.VT _.RC:$src1),
- (_.VT (bitconvert (_.LdFrag addr:$src2))),
- imm:$cc)),
- (i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmi) _.RC:$src1,
- addr:$src2,
- imm:$cc),
- NewInf.KRC)>;
-
- def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (and _.KRCWM:$mask,
- (X86cmpm (_.VT _.RC:$src1),
- (_.VT (bitconvert
- (_.LdFrag addr:$src2))),
- imm:$cc))),
- (i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmik) _.KRCWM:$mask,
- _.RC:$src1,
- addr:$src2,
- imm:$cc),
- NewInf.KRC)>;
-
- def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (X86cmpm (_.VT _.RC:$src1),
- (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
- imm:$cc)),
- (i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbi) _.RC:$src1,
- addr:$src2,
- imm:$cc),
- NewInf.KRC)>;
-
- def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (and _.KRCWM:$mask,
- (X86cmpm (_.VT _.RC:$src1),
- (X86VBroadcast
- (_.ScalarLdFrag addr:$src2)),
- imm:$cc))),
- (i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbik) _.KRCWM:$mask,
- _.RC:$src1,
- addr:$src2,
- imm:$cc),
- NewInf.KRC)>;
-}
-}
-
-multiclass avx512_fcmp_cc_packed_sae_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
- string InstrStr, list<Predicate> Preds>
- : avx512_fcmp_cc_packed_lowering<_, NewInf, InstrStr, Preds> {
-
-let Predicates = Preds in
- def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (X86cmpmRnd (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- imm:$cc,
- (i32 FROUND_NO_EXC))),
- (i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrib) _.RC:$src1,
- _.RC:$src2,
- imm:$cc),
- NewInf.KRC)>;
-
- def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (and _.KRCWM:$mask,
- (X86cmpmRnd (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- imm:$cc,
- (i32 FROUND_NO_EXC)))),
- (i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rribk) _.KRCWM:$mask,
- _.RC:$src1,
- _.RC:$src2,
- imm:$cc),
- NewInf.KRC)>;
-}
-
-
-// VCMPPS - f32
-defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v8i1_info, "VCMPPSZ128",
- [HasAVX512, HasVLX]>;
-defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v16i1_info, "VCMPPSZ128",
- [HasAVX512, HasVLX]>;
-defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v32i1_info, "VCMPPSZ128",
- [HasAVX512, HasVLX]>;
-defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v64i1_info, "VCMPPSZ128",
- [HasAVX512, HasVLX]>;
-
-defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v16i1_info, "VCMPPSZ256",
- [HasAVX512, HasVLX]>;
-defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v32i1_info, "VCMPPSZ256",
- [HasAVX512, HasVLX]>;
-defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v64i1_info, "VCMPPSZ256",
- [HasAVX512, HasVLX]>;
-
-defm : avx512_fcmp_cc_packed_sae_lowering<v16f32_info, v32i1_info, "VCMPPSZ",
- [HasAVX512]>;
-defm : avx512_fcmp_cc_packed_sae_lowering<v16f32_info, v64i1_info, "VCMPPSZ",
- [HasAVX512]>;
-
-// VCMPPD - f64
-defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v4i1_info, "VCMPPDZ128",
- [HasAVX512, HasVLX]>;
-defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v8i1_info, "VCMPPDZ128",
- [HasAVX512, HasVLX]>;
-defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v16i1_info, "VCMPPDZ128",
- [HasAVX512, HasVLX]>;
-defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v32i1_info, "VCMPPDZ128",
- [HasAVX512, HasVLX]>;
-defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v64i1_info, "VCMPPDZ128",
- [HasAVX512, HasVLX]>;
-
-defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v8i1_info, "VCMPPDZ256",
- [HasAVX512, HasVLX]>;
-defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v16i1_info, "VCMPPDZ256",
- [HasAVX512, HasVLX]>;
-defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v32i1_info, "VCMPPDZ256",
- [HasAVX512, HasVLX]>;
-defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v64i1_info, "VCMPPDZ256",
- [HasAVX512, HasVLX]>;
-
-defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v16i1_info, "VCMPPDZ",
- [HasAVX512]>;
-defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v32i1_info, "VCMPPDZ",
- [HasAVX512]>;
-defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v64i1_info, "VCMPPDZ",
- [HasAVX512]>;
// ----------------------------------------------------------------
// FPClass
@@ -3211,24 +2617,6 @@ def : Pat<(v8i1 (and VK8:$mask,
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
VK8)>;
-
-def : Pat<(insert_subvector (v16i1 immAllZerosV),
- (v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
- (i64 0)),
- (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrr)
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
- (i8 8)), (i8 8))>;
-
-def : Pat<(insert_subvector (v16i1 immAllZerosV),
- (v8i1 (and VK8:$mask,
- (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))),
- (i64 0)),
- (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrk)
- (COPY_TO_REGCLASS VK8:$mask, VK16),
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
- (i8 8)), (i8 8))>;
}
multiclass axv512_icmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
@@ -3246,26 +2634,6 @@ def : Pat<(v8i1 (and VK8:$mask, (OpNode
(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
imm:$cc), VK8)>;
-
-def : Pat<(insert_subvector (v16i1 immAllZerosV),
- (v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)),
- (i64 0)),
- (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrri)
- (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
- (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
- imm:$cc),
- (i8 8)), (i8 8))>;
-
-def : Pat<(insert_subvector (v16i1 immAllZerosV),
- (v8i1 (and VK8:$mask,
- (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc))),
- (i64 0)),
- (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrik)
- (COPY_TO_REGCLASS VK8:$mask, VK16),
- (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
- (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
- imm:$cc),
- (i8 8)), (i8 8))>;
}
let Predicates = [HasAVX512, NoVLX] in {
Modified: llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td?rev=314133&r1=314132&r2=314133&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td Mon Sep 25 11:43:13 2017
@@ -419,3 +419,84 @@ defm: subvector_zero_ellision<VR256, v8i
defm: subvector_zero_ellision<VR256, v16i32, v8i32, v16i32, sub_ymm, zeroupperv8i32>;
defm: subvector_zero_ellision<VR256, v32i16, v16i16, v16i32, sub_ymm, zeroupperv16i16>;
defm: subvector_zero_ellision<VR256, v64i8, v32i8, v16i32, sub_ymm, zeroupperv32i8>;
+
+
+class maskzeroupper<ValueType vt, RegisterClass RC> :
+ PatLeaf<(vt RC:$src), [{
+ return isMaskZeroExtended(N);
+ }]>;
+
+def maskzeroupperv2i1 : maskzeroupper<v2i1, VK2>;
+def maskzeroupperv4i1 : maskzeroupper<v4i1, VK4>;
+def maskzeroupperv8i1 : maskzeroupper<v8i1, VK8>;
+def maskzeroupperv16i1 : maskzeroupper<v16i1, VK16>;
+def maskzeroupperv32i1 : maskzeroupper<v32i1, VK32>;
+
+// The patterns determine if we can depend on the upper bits of a mask register
+// being zeroed by the previous operation so that we can skip explicit
+// zeroing.
+let Predicates = [HasBWI] in {
+ def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+ maskzeroupperv8i1:$src, (iPTR 0))),
+ (COPY_TO_REGCLASS VK8:$src, VK32)>;
+ def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+ maskzeroupperv16i1:$src, (iPTR 0))),
+ (COPY_TO_REGCLASS VK16:$src, VK32)>;
+ def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+ maskzeroupperv8i1:$src, (iPTR 0))),
+ (COPY_TO_REGCLASS VK8:$src, VK64)>;
+ def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+ maskzeroupperv16i1:$src, (iPTR 0))),
+ (COPY_TO_REGCLASS VK16:$src, VK64)>;
+ def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+ maskzeroupperv32i1:$src, (iPTR 0))),
+ (COPY_TO_REGCLASS VK32:$src, VK64)>;
+}
+
+let Predicates = [HasAVX512] in {
+ def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+ maskzeroupperv8i1:$src, (iPTR 0))),
+ (COPY_TO_REGCLASS VK8:$src, VK16)>;
+}
+
+let Predicates = [HasVLX] in {
+ def : Pat<(v4i1 (insert_subvector (v4i1 immAllZerosV),
+ maskzeroupperv2i1:$src, (iPTR 0))),
+ (COPY_TO_REGCLASS VK2:$src, VK4)>;
+ def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
+ maskzeroupperv2i1:$src, (iPTR 0))),
+ (COPY_TO_REGCLASS VK2:$src, VK8)>;
+ def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
+ maskzeroupperv4i1:$src, (iPTR 0))),
+ (COPY_TO_REGCLASS VK4:$src, VK8)>;
+ def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+ maskzeroupperv2i1:$src, (iPTR 0))),
+ (COPY_TO_REGCLASS VK2:$src, VK16)>;
+ def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+ maskzeroupperv4i1:$src, (iPTR 0))),
+ (COPY_TO_REGCLASS VK4:$src, VK16)>;
+}
+
+let Predicates = [HasBWI, HasVLX] in {
+ def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+ maskzeroupperv2i1:$src, (iPTR 0))),
+ (COPY_TO_REGCLASS VK2:$src, VK32)>;
+ def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+ maskzeroupperv4i1:$src, (iPTR 0))),
+ (COPY_TO_REGCLASS VK4:$src, VK32)>;
+ def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+ maskzeroupperv2i1:$src, (iPTR 0))),
+ (COPY_TO_REGCLASS VK2:$src, VK64)>;
+ def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+ maskzeroupperv4i1:$src, (iPTR 0))),
+ (COPY_TO_REGCLASS VK4:$src, VK64)>;
+}
+
+// If the bits are not zero we have to fall back to explicitly zeroing by
+// using shifts.
+let Predicates = [HasAVX512, NoVLX] in {
+ def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+ (v8i1 VK8:$mask), (iPTR 0))),
+ (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK8:$mask, VK16),
+ (i8 8)), (i8 8))>;
+}
More information about the llvm-commits
mailing list