[llvm] r323612 - [X86] Remove VPTESTM/VPTESTNM ISD opcodes. Use isel patterns matching cmpm eq/ne with immallzeros.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 27 16:56:30 PST 2018
Author: ctopper
Date: Sat Jan 27 16:56:30 2018
New Revision: 323612
URL: http://llvm.org/viewvc/llvm-project?rev=323612&view=rev
Log:
[X86] Remove VPTESTM/VPTESTNM ISD opcodes. Use isel patterns matching cmpm eq/ne with immallzeros.
Modified:
llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
llvm/trunk/test/CodeGen/X86/prefer-avx256-mask-extend.ll
llvm/trunk/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=323612&r1=323611&r2=323612&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Sat Jan 27 16:56:30 2018
@@ -451,8 +451,7 @@ namespace {
// type.
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
unsigned Opcode = N->getOpcode();
- if (Opcode == X86ISD::CMPM || Opcode == X86ISD::TESTM ||
- Opcode == X86ISD::TESTNM || Opcode == X86ISD::CMPMU ||
+ if (Opcode == X86ISD::CMPM || Opcode == X86ISD::CMPMU ||
Opcode == X86ISD::CMPM_RND) {
// We can get 256-bit 8 element types here without VLX being enabled. When
// this happens we will use 512-bit operations and the mask will not be
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=323612&r1=323611&r2=323612&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Jan 27 16:56:30 2018
@@ -5043,8 +5043,6 @@ static bool isMaskedZeroUpperBitsvXi1(un
switch (Opcode) {
default:
return false;
- case X86ISD::TESTM:
- case X86ISD::TESTNM:
case X86ISD::CMPM:
case X86ISD::CMPMU:
case X86ISD::CMPM_RND:
@@ -14639,9 +14637,11 @@ SDValue X86TargetLowering::LowerVSELECT(
assert(Cond.getValueType().getScalarSizeInBits() ==
VT.getScalarSizeInBits() &&
"Should have a size-matched integer condition!");
- // Build a mask by testing the condition against itself (tests for zero).
+ // Build a mask by testing the condition against zero.
MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
- SDValue Mask = DAG.getNode(X86ISD::TESTM, dl, MaskVT, Cond, Cond);
+ SDValue Mask = DAG.getNode(X86ISD::CMPM, dl, MaskVT, Cond,
+ getZeroVector(VT, Subtarget, DAG, dl),
+ DAG.getConstant(4, dl, MVT::i8));
// Now return a new VSELECT using the mask.
return DAG.getSelect(dl, VT, Mask, Op.getOperand(1), Op.getOperand(2));
}
@@ -16609,7 +16609,9 @@ static SDValue LowerTruncateVecI1(SDValu
In = DAG.getNode(ISD::SHL, DL, InVT, In,
DAG.getConstant(ShiftInx, DL, InVT));
}
- return DAG.getNode(X86ISD::TESTM, DL, VT, In, In);
+ return DAG.getNode(X86ISD::CMPM, DL, VT, In,
+ getZeroVector(InVT, Subtarget, DAG, DL),
+ DAG.getConstant(4, DL, MVT::i8));
}
SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
@@ -17766,26 +17768,6 @@ static SDValue LowerIntVSETCC_AVX512(SDV
if (Swap)
std::swap(Op0, Op1);
- // See if it is the case of CMP(EQ|NEQ,AND(A,B),ZERO) and change it to TESTM|NM.
- if (SSECC == 4 || SSECC == 0) {
- SDValue A = peekThroughBitcasts(Op0);
- if ((A.getOpcode() == ISD::AND || A.getOpcode() == X86ISD::FAND) &&
- ISD::isBuildVectorAllZeros(Op1.getNode())) {
- MVT VT0 = Op0.getSimpleValueType();
- SDValue RHS = DAG.getBitcast(VT0, A.getOperand(0));
- SDValue LHS = DAG.getBitcast(VT0, A.getOperand(1));
- return DAG.getNode(SSECC == 0 ? X86ISD::TESTNM : X86ISD::TESTM,
- dl, VT, RHS, LHS);
- }
-
- // If this is just a comparison with 0 without an AND, we can just use
- // the same input twice to avoid creating a zero vector.
- if (ISD::isBuildVectorAllZeros(Op1.getNode())) {
- return DAG.getNode(SSECC == 0 ? X86ISD::TESTNM : X86ISD::TESTM,
- dl, VT, Op0, Op0);
- }
- }
-
unsigned Opc = ISD::isUnsignedIntSetCC(SetCCOpcode) ? X86ISD::CMPMU
: X86ISD::CMPM;
return DAG.getNode(Opc, dl, VT, Op0, Op1,
@@ -25365,8 +25347,6 @@ const char *X86TargetLowering::getTarget
case X86ISD::MOVMSK: return "X86ISD::MOVMSK";
case X86ISD::PTEST: return "X86ISD::PTEST";
case X86ISD::TESTP: return "X86ISD::TESTP";
- case X86ISD::TESTM: return "X86ISD::TESTM";
- case X86ISD::TESTNM: return "X86ISD::TESTNM";
case X86ISD::KORTEST: return "X86ISD::KORTEST";
case X86ISD::KTEST: return "X86ISD::KTEST";
case X86ISD::KSHIFTL: return "X86ISD::KSHIFTL";
@@ -37674,28 +37654,6 @@ static SDValue combineVSZext(SDNode *N,
return SDValue();
}
-static SDValue combineTestM(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
- SDValue Op0 = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
-
- MVT VT = N->getSimpleValueType(0);
- SDLoc DL(N);
-
- // TEST (AND a, b) ,(AND a, b) -> TEST a, b
- if (Op0 == Op1 && Op1->getOpcode() == ISD::AND)
- return DAG.getNode(X86ISD::TESTM, DL, VT, Op0->getOperand(0),
- Op0->getOperand(1));
-
- // TEST op0, BUILD_VECTOR(all_zero) -> BUILD_VECTOR(all_zero)
- // TEST BUILD_VECTOR(all_zero), op1 -> BUILD_VECTOR(all_zero)
- if (ISD::isBuildVectorAllZeros(Op0.getNode()) ||
- ISD::isBuildVectorAllZeros(Op1.getNode()))
- return getZeroVector(VT, Subtarget, DAG, DL);
-
- return SDValue();
-}
-
static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
MVT VT = N->getSimpleValueType(0);
@@ -38001,7 +37959,6 @@ SDValue X86TargetLowering::PerformDAGCom
case X86ISD::MSCATTER:
case ISD::MGATHER:
case ISD::MSCATTER: return combineGatherScatter(N, DAG, DCI, Subtarget);
- case X86ISD::TESTM: return combineTestM(N, DAG, Subtarget);
case X86ISD::PCMPEQ:
case X86ISD::PCMPGT: return combineVectorCompare(N, DAG, Subtarget);
}
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=323612&r1=323611&r2=323612&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Sat Jan 27 16:56:30 2018
@@ -368,10 +368,6 @@ namespace llvm {
// Vector packed fp sign bitwise comparisons.
TESTP,
- // Vector "test" in AVX-512, the result is in a mask vector.
- TESTM,
- TESTNM,
-
// OR/AND test for masks.
KORTEST,
KTEST,
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=323612&r1=323611&r2=323612&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sat Jan 27 16:56:30 2018
@@ -2084,6 +2084,8 @@ multiclass avx512_icmp_packed_rmb_vl<bit
def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2),
(X86cmpm node:$src1, node:$src2, (i8 0))>;
+def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2),
+ (X86cmpm node:$src1, node:$src2, (i8 4))>;
def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
(X86cmpm node:$src1, node:$src2, (i8 6))>;
@@ -5197,42 +5199,57 @@ defm VSCALEF : avx512_fp_scalef_all<0x2C
// AVX-512 VPTESTM instructions
//===----------------------------------------------------------------------===//
-multiclass avx512_vptest<bits<8> opc, string OpcodeStr, SDNode OpNode,
- OpndItins itins, X86VectorVTInfo _> {
+multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode,
+ OpndItins itins, X86VectorVTInfo _, string Suffix> {
let ExeDomain = _.ExeDomain in {
let isCommutable = 1 in
defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>,
+ (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
+ _.ImmAllZerosV), itins.rr>,
EVEX_4V, Sched<[itins.Sched]>;
defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (OpNode (_.VT _.RC:$src1),
- (_.VT (bitconvert (_.LdFrag addr:$src2)))), itins.rm>,
- EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
+ (OpNode (bitconvert
+ (_.i64VT (and _.RC:$src1,
+ (bitconvert (_.LdFrag addr:$src2))))),
+ _.ImmAllZerosV),
+ itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
+
+ // Patterns for compare with 0 that just use the same source twice.
+ def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
+ (_.KVT (!cast<Instruction>(NAME # Suffix # _.ZSuffix # "rr")
+ _.RC:$src, _.RC:$src))>;
+
+ def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
+ (_.KVT (!cast<Instruction>(NAME # Suffix # _.ZSuffix # "rrk")
+ _.KRC:$mask, _.RC:$src, _.RC:$src))>;
}
-multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
OpndItins itins, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in
defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
"${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr,
- (OpNode (_.VT _.RC:$src1), (_.VT (X86VBroadcast
- (_.ScalarLdFrag addr:$src2)))),
+ (OpNode (and _.RC:$src1,
+ (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2))),
+ _.ImmAllZerosV),
itins.rm>, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
// Use 512bit version to implement 128/256 bit in case NoVLX.
-multiclass avx512_vptest_lowering<SDNode OpNode, X86VectorVTInfo ExtendInfo,
+multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
X86VectorVTInfo _, string Suffix> {
- def : Pat<(_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))),
+ def : Pat<(_.KVT (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
+ _.ImmAllZerosV)),
(_.KVT (COPY_TO_REGCLASS
(!cast<Instruction>(NAME # Suffix # "Zrr")
(INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
@@ -5242,7 +5259,8 @@ multiclass avx512_vptest_lowering<SDNode
_.KRC))>;
def : Pat<(_.KVT (and _.KRC:$mask,
- (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))),
+ (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
+ _.ImmAllZerosV))),
(COPY_TO_REGCLASS
(!cast<Instruction>(NAME # Suffix # "Zrrk")
(COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
@@ -5251,19 +5269,38 @@ multiclass avx512_vptest_lowering<SDNode
(INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
_.RC:$src2, _.SubRegIdx)),
_.KRC)>;
+
+ def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
+ (_.KVT (COPY_TO_REGCLASS
+ (!cast<Instruction>(NAME # Suffix # "Zrr")
+ (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
+ _.RC:$src, _.SubRegIdx),
+ (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
+ _.RC:$src, _.SubRegIdx)),
+ _.KRC))>;
+
+ def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(NAME # Suffix # "Zrrk")
+ (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
+ (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
+ _.RC:$src, _.SubRegIdx),
+ (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
+ _.RC:$src, _.SubRegIdx)),
+ _.KRC)>;
}
-multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode,
OpndItins itins, AVX512VLVectorVTInfo _,
string Suffix> {
let Predicates = [HasAVX512] in
- defm Z : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info512>,
+ defm Z : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info512, Suffix>,
avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
- defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info256>,
+ defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info256, Suffix>,
avx512_vptest_mb<opc, OpcodeStr, OpNode,itins, _.info256>, EVEX_V256;
- defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info128>,
+ defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info128, Suffix>,
avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
}
let Predicates = [HasAVX512, NoVLX] in {
@@ -5272,7 +5309,7 @@ multiclass avx512_vptest_dq_sizes<bits<8
}
}
-multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode,
OpndItins itins> {
defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, itins,
avx512vl_i32_info, "D">;
@@ -5281,41 +5318,41 @@ multiclass avx512_vptest_dq<bits<8> opc,
}
multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
- SDNode OpNode, OpndItins itins> {
+ PatFrag OpNode, OpndItins itins> {
let Predicates = [HasBWI] in {
- defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v32i16_info>,
+ defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v32i16_info, "W">,
EVEX_V512, VEX_W;
- defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v64i8_info>,
+ defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v64i8_info, "B">,
EVEX_V512;
}
let Predicates = [HasVLX, HasBWI] in {
- defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v16i16x_info>,
+ defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v16i16x_info, "W">,
EVEX_V256, VEX_W;
- defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v8i16x_info>,
+ defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v8i16x_info, "W">,
EVEX_V128, VEX_W;
- defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v32i8x_info>,
+ defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v32i8x_info, "B">,
EVEX_V256;
- defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v16i8x_info>,
+ defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v16i8x_info, "B">,
EVEX_V128;
}
let Predicates = [HasAVX512, NoVLX] in {
- defm BZ256_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v32i8x_info, "B">;
- defm BZ128_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v16i8x_info, "B">;
- defm WZ256_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v16i16x_info, "W">;
- defm WZ128_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v8i16x_info, "W">;
+ defm BZ256_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v32i8x_info, "B">;
+ defm BZ128_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v16i8x_info, "B">;
+ defm WZ256_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v16i16x_info, "W">;
+ defm WZ128_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v8i16x_info, "W">;
}
}
multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
- SDNode OpNode, OpndItins itins> :
+ PatFrag OpNode, OpndItins itins> :
avx512_vptest_wb <opc_wb, OpcodeStr, OpNode, itins>,
avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, itins>;
-defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86testm,
+defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem,
SSE_BIT_ITINS_P>, T8PD;
-defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm,
+defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm,
SSE_BIT_ITINS_P>, T8XS;
Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=323612&r1=323611&r2=323612&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Sat Jan 27 16:56:30 2018
@@ -234,10 +234,6 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2,
SDTCisVec<1>,
SDTCisSameAs<2, 1>]>;
-def SDTX86Testm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
- SDTCisSameAs<2, 1>, SDTCVecEltisVT<0, i1>,
- SDTCisSameNumEltsAs<0, 1>]>;
-
def X86addus : SDNode<"X86ISD::ADDUS", SDTIntBinOp, [SDNPCommutative]>;
def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp, [SDNPCommutative]>;
@@ -248,8 +244,6 @@ def X86ptest : SDNode<"X86ISD::PTEST",
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
def X86ktest : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>;
-def X86testm : SDNode<"X86ISD::TESTM", SDTX86Testm, [SDNPCommutative]>;
-def X86testnm : SDNode<"X86ISD::TESTNM", SDTX86Testm, [SDNPCommutative]>;
def X86movmsk : SDNode<"X86ISD::MOVMSK",
SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVec<1>]>>;
Modified: llvm/trunk/test/CodeGen/X86/prefer-avx256-mask-extend.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/prefer-avx256-mask-extend.ll?rev=323612&r1=323611&r2=323612&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/prefer-avx256-mask-extend.ll (original)
+++ llvm/trunk/test/CodeGen/X86/prefer-avx256-mask-extend.ll Sat Jan 27 16:56:30 2018
@@ -7,8 +7,8 @@
define <8 x i16> @testv8i1_sext_v8i16(<8 x i32>* %p) {
; AVX256-LABEL: testv8i1_sext_v8i16:
; AVX256: # %bb.0:
-; AVX256-NEXT: vmovdqa (%rdi), %ymm0
-; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1
+; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX256-NEXT: vpmovdw %ymm0, %xmm0
@@ -17,8 +17,8 @@ define <8 x i16> @testv8i1_sext_v8i16(<8
;
; AVX512VL-LABEL: testv8i1_sext_v8i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
-; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
+; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
@@ -42,10 +42,9 @@ define <8 x i16> @testv8i1_sext_v8i16(<8
define <16 x i8> @testv16i1_sext_v16i8(<8 x i32>* %p, <8 x i32>* %q) {
; AVX256-LABEL: testv16i1_sext_v16i8:
; AVX256: # %bb.0:
-; AVX256-NEXT: vmovdqa (%rdi), %ymm0
-; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1
-; AVX256-NEXT: vmovdqa (%rsi), %ymm0
-; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k2
+; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
+; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z}
; AVX256-NEXT: vpmovdw %ymm1, %xmm1
@@ -59,10 +58,9 @@ define <16 x i8> @testv16i1_sext_v16i8(<
;
; AVX512VL-LABEL: testv16i1_sext_v16i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
-; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0
-; AVX512VL-NEXT: vmovdqa (%rsi), %ymm0
-; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
+; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
+; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k1
; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
@@ -92,10 +90,9 @@ define <16 x i8> @testv16i1_sext_v16i8(<
define <16 x i16> @testv16i1_sext_v16i16(<8 x i32>* %p, <8 x i32>* %q) {
; AVX256-LABEL: testv16i1_sext_v16i16:
; AVX256: # %bb.0:
-; AVX256-NEXT: vmovdqa (%rdi), %ymm0
-; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1
-; AVX256-NEXT: vmovdqa (%rsi), %ymm0
-; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k2
+; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
+; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z}
; AVX256-NEXT: vpmovdw %ymm1, %xmm1
@@ -106,10 +103,9 @@ define <16 x i16> @testv16i1_sext_v16i16
;
; AVX512VL-LABEL: testv16i1_sext_v16i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
-; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0
-; AVX512VL-NEXT: vmovdqa (%rsi), %ymm0
-; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
+; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
+; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k1
; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
@@ -137,8 +133,8 @@ define <16 x i16> @testv16i1_sext_v16i16
define <8 x i16> @testv8i1_zext_v8i16(<8 x i32>* %p) {
; AVX256-LABEL: testv8i1_zext_v8i16:
; AVX256: # %bb.0:
-; AVX256-NEXT: vmovdqa (%rdi), %ymm0
-; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1
+; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
; AVX256-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
; AVX256-NEXT: vpmovdw %ymm0, %xmm0
; AVX256-NEXT: vzeroupper
@@ -146,8 +142,8 @@ define <8 x i16> @testv8i1_zext_v8i16(<8
;
; AVX512VL-LABEL: testv8i1_zext_v8i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
-; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
+; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
; AVX512VL-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
; AVX512VL-NEXT: vzeroupper
@@ -170,10 +166,9 @@ define <8 x i16> @testv8i1_zext_v8i16(<8
define <16 x i8> @testv16i1_zext_v16i8(<8 x i32>* %p, <8 x i32>* %q) {
; AVX256-LABEL: testv16i1_zext_v16i8:
; AVX256: # %bb.0:
-; AVX256-NEXT: vmovdqa (%rdi), %ymm0
-; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1
-; AVX256-NEXT: vmovdqa (%rsi), %ymm0
-; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k2
+; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
+; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
; AVX256-NEXT: movl {{.*}}(%rip), %eax
; AVX256-NEXT: vpbroadcastd %eax, %ymm0 {%k2} {z}
; AVX256-NEXT: vpmovdw %ymm0, %xmm0
@@ -188,10 +183,9 @@ define <16 x i8> @testv16i1_zext_v16i8(<
;
; AVX512VL-LABEL: testv16i1_zext_v16i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
-; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0
-; AVX512VL-NEXT: vmovdqa (%rsi), %ymm0
-; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
+; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
+; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k1
; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1
; AVX512VL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
@@ -221,10 +215,9 @@ define <16 x i8> @testv16i1_zext_v16i8(<
define <16 x i16> @testv16i1_zext_v16i16(<8 x i32>* %p, <8 x i32>* %q) {
; AVX256-LABEL: testv16i1_zext_v16i16:
; AVX256: # %bb.0:
-; AVX256-NEXT: vmovdqa (%rdi), %ymm0
-; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1
-; AVX256-NEXT: vmovdqa (%rsi), %ymm0
-; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k2
+; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
+; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
; AVX256-NEXT: movl {{.*}}(%rip), %eax
; AVX256-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z}
; AVX256-NEXT: vpmovdw %ymm0, %xmm0
@@ -235,10 +228,9 @@ define <16 x i16> @testv16i1_zext_v16i16
;
; AVX512VL-LABEL: testv16i1_zext_v16i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
-; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0
-; AVX512VL-NEXT: vmovdqa (%rsi), %ymm0
-; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
+; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
+; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k1
; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1
; AVX512VL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
Modified: llvm/trunk/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll?rev=323612&r1=323611&r2=323612&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll (original)
+++ llvm/trunk/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll Sat Jan 27 16:56:30 2018
@@ -11,10 +11,9 @@
define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<8 x i32>* %a, <8 x i32>* %b) {
; AVX256VL-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
; AVX256VL: # %bb.0:
-; AVX256VL-NEXT: vmovdqa (%rdi), %ymm0
-; AVX256VL-NEXT: vmovdqa (%rsi), %ymm1
-; AVX256VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
-; AVX256VL-NEXT: vptestnmd %ymm1, %ymm1, %k2
+; AVX256VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; AVX256VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
+; AVX256VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
; AVX256VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z}
; AVX256VL-NEXT: vpmovdw %ymm1, %xmm1
@@ -45,10 +44,9 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7
;
; AVX512VL-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
-; AVX512VL-NEXT: vmovdqa (%rsi), %ymm1
-; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
-; AVX512VL-NEXT: vptestnmd %ymm1, %ymm1, %k2
+; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
+; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,18,20,3,7,7,0,3,6,1,21,3,19,7,0]
@@ -61,10 +59,9 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7
;
; AVX256VLBW-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
; AVX256VLBW: # %bb.0:
-; AVX256VLBW-NEXT: vmovdqa (%rdi), %ymm0
-; AVX256VLBW-NEXT: vmovdqa (%rsi), %ymm1
-; AVX256VLBW-NEXT: vptestnmd %ymm0, %ymm0, %k0
-; AVX256VLBW-NEXT: vptestnmd %ymm1, %ymm1, %k1
+; AVX256VLBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; AVX256VLBW-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
+; AVX256VLBW-NEXT: vpcmpeqd (%rsi), %ymm0, %k1
; AVX256VLBW-NEXT: vpmovm2w %k1, %ymm0
; AVX256VLBW-NEXT: vpmovm2w %k0, %ymm1
; AVX256VLBW-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,18,20,3,7,7,0,3,6,1,21,3,19,7,0]
@@ -76,10 +73,9 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7
;
; AVX512VLBW-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vmovdqa (%rdi), %ymm0
-; AVX512VLBW-NEXT: vmovdqa (%rsi), %ymm1
-; AVX512VLBW-NEXT: vptestnmd %ymm0, %ymm0, %k1
-; AVX512VLBW-NEXT: vptestnmd %ymm1, %ymm1, %k2
+; AVX512VLBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
+; AVX512VLBW-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
; AVX512VLBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; AVX512VLBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,18,20,3,7,7,0,3,6,1,21,3,19,7,0]
More information about the llvm-commits
mailing list