[llvm] 9201efb - [X86] Custom match X86ISD::VPTERNLOG in X86ISelDAGToDAG in order to reduce isel patterns.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 10 23:33:40 PDT 2020
Author: Craig Topper
Date: 2020-08-10T23:15:58-07:00
New Revision: 9201efb3b96e5ae229268aba8576832e80acef71
URL: https://github.com/llvm/llvm-project/commit/9201efb3b96e5ae229268aba8576832e80acef71
DIFF: https://github.com/llvm/llvm-project/commit/9201efb3b96e5ae229268aba8576832e80acef71.diff
LOG: [X86] Custom match X86ISD::VPTERNLOG in X86ISelDAGToDAG in order to reduce isel patterns.
By factoring out the end of tryVPTERNLOG, we can use the same code
to directly match X86ISD::VPTERNLOG. This allows us to remove
around 3-4K worth of X86GenDAGISel.inc.
Added:
Modified:
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/lib/Target/X86/X86InstrAVX512.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 44fb0cd49ef5..7a5ae2c8a04c 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -499,6 +499,8 @@ namespace {
bool tryShiftAmountMod(SDNode *N);
bool tryShrinkShlLogicImm(SDNode *N);
bool tryVPTERNLOG(SDNode *N);
+ bool matchVPTERNLOG(SDNode *Root, SDNode *ParentA, SDNode *ParentBC,
+ SDValue A, SDValue B, SDValue C, uint8_t Imm);
bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
bool tryMatchBitSelect(SDNode *N);
@@ -3929,78 +3931,12 @@ bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) {
return true;
}
-// Try to match two logic ops to a VPTERNLOG.
-// FIXME: Handle inverted inputs?
-// FIXME: Handle more complex patterns that use an operand more than once?
-bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
- MVT NVT = N->getSimpleValueType(0);
-
- // Make sure we support VPTERNLOG.
- if (!NVT.isVector() || !Subtarget->hasAVX512() ||
- NVT.getVectorElementType() == MVT::i1)
- return false;
-
- // We need VLX for 128/256-bit.
- if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
- return false;
-
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
-
- auto getFoldableLogicOp = [](SDValue Op) {
- // Peek through single use bitcast.
- if (Op.getOpcode() == ISD::BITCAST && Op.hasOneUse())
- Op = Op.getOperand(0);
-
- if (!Op.hasOneUse())
- return SDValue();
-
- unsigned Opc = Op.getOpcode();
- if (Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR ||
- Opc == X86ISD::ANDNP)
- return Op;
-
- return SDValue();
- };
-
- SDValue A, FoldableOp;
- if ((FoldableOp = getFoldableLogicOp(N1))) {
- A = N0;
- } else if ((FoldableOp = getFoldableLogicOp(N0))) {
- A = N1;
- } else
- return false;
-
- SDValue B = FoldableOp.getOperand(0);
- SDValue C = FoldableOp.getOperand(1);
-
- // We can build the appropriate control immediate by performing the logic
- // operation we're matching using these constants for A, B, and C.
- const uint8_t TernlogMagicA = 0xf0;
- const uint8_t TernlogMagicB = 0xcc;
- const uint8_t TernlogMagicC = 0xaa;
-
- uint8_t Imm;
- switch (FoldableOp.getOpcode()) {
- default: llvm_unreachable("Unexpected opcode!");
- case ISD::AND: Imm = TernlogMagicB & TernlogMagicC; break;
- case ISD::OR: Imm = TernlogMagicB | TernlogMagicC; break;
- case ISD::XOR: Imm = TernlogMagicB ^ TernlogMagicC; break;
- case X86ISD::ANDNP: Imm = ~(TernlogMagicB) & TernlogMagicC; break;
- }
-
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected opcode!");
- case X86ISD::ANDNP:
- if (A == N0)
- Imm &= ~TernlogMagicA;
- else
- Imm = ~(Imm) & TernlogMagicA;
- break;
- case ISD::AND: Imm &= TernlogMagicA; break;
- case ISD::OR: Imm |= TernlogMagicA; break;
- case ISD::XOR: Imm ^= TernlogMagicA; break;
- }
+bool X86DAGToDAGISel::matchVPTERNLOG(SDNode *Root, SDNode *ParentA,
+ SDNode *ParentBC, SDValue A, SDValue B,
+ SDValue C, uint8_t Imm) {
+ assert(A.isOperandOf(ParentA));
+ assert(B.isOperandOf(ParentBC));
+ assert(C.isOperandOf(ParentBC));
auto tryFoldLoadOrBCast =
[this](SDNode *Root, SDNode *P, SDValue &L, SDValue &Base, SDValue &Scale,
@@ -4028,10 +3964,10 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
bool FoldedLoad = false;
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- if (tryFoldLoadOrBCast(N, FoldableOp.getNode(), C, Tmp0, Tmp1, Tmp2, Tmp3,
- Tmp4)) {
+ if (tryFoldLoadOrBCast(Root, ParentBC, C, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
FoldedLoad = true;
- } else if (tryFoldLoadOrBCast(N, N, A, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+ } else if (tryFoldLoadOrBCast(Root, ParentA, A, Tmp0, Tmp1, Tmp2, Tmp3,
+ Tmp4)) {
FoldedLoad = true;
std::swap(A, C);
// Swap bits 1/4 and 3/6.
@@ -4041,8 +3977,8 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
if (OldImm & 0x10) Imm |= 0x02;
if (OldImm & 0x08) Imm |= 0x40;
if (OldImm & 0x40) Imm |= 0x08;
- } else if (tryFoldLoadOrBCast(N, FoldableOp.getNode(), B, Tmp0, Tmp1, Tmp2,
- Tmp3, Tmp4)) {
+ } else if (tryFoldLoadOrBCast(Root, ParentBC, B, Tmp0, Tmp1, Tmp2, Tmp3,
+ Tmp4)) {
FoldedLoad = true;
std::swap(B, C);
// Swap bits 1/2 and 5/6.
@@ -4054,10 +3990,12 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
if (OldImm & 0x40) Imm |= 0x20;
}
- SDLoc DL(N);
+ SDLoc DL(Root);
SDValue TImm = CurDAG->getTargetConstant(Imm, DL, MVT::i8);
+ MVT NVT = Root->getSimpleValueType(0);
+
MachineSDNode *MNode;
if (FoldedLoad) {
SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
@@ -4111,11 +4049,87 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
MNode = CurDAG->getMachineNode(Opc, DL, NVT, {A, B, C, TImm});
}
- ReplaceUses(SDValue(N, 0), SDValue(MNode, 0));
- CurDAG->RemoveDeadNode(N);
+ ReplaceUses(SDValue(Root, 0), SDValue(MNode, 0));
+ CurDAG->RemoveDeadNode(Root);
return true;
}
+// Try to match two logic ops to a VPTERNLOG.
+// FIXME: Handle inverted inputs?
+// FIXME: Handle more complex patterns that use an operand more than once?
+bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
+ MVT NVT = N->getSimpleValueType(0);
+
+ // Make sure we support VPTERNLOG.
+ if (!NVT.isVector() || !Subtarget->hasAVX512() ||
+ NVT.getVectorElementType() == MVT::i1)
+ return false;
+
+ // We need VLX for 128/256-bit.
+ if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
+ return false;
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ auto getFoldableLogicOp = [](SDValue Op) {
+ // Peek through single use bitcast.
+ if (Op.getOpcode() == ISD::BITCAST && Op.hasOneUse())
+ Op = Op.getOperand(0);
+
+ if (!Op.hasOneUse())
+ return SDValue();
+
+ unsigned Opc = Op.getOpcode();
+ if (Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR ||
+ Opc == X86ISD::ANDNP)
+ return Op;
+
+ return SDValue();
+ };
+
+ SDValue A, FoldableOp;
+ if ((FoldableOp = getFoldableLogicOp(N1))) {
+ A = N0;
+ } else if ((FoldableOp = getFoldableLogicOp(N0))) {
+ A = N1;
+ } else
+ return false;
+
+ SDValue B = FoldableOp.getOperand(0);
+ SDValue C = FoldableOp.getOperand(1);
+
+ // We can build the appropriate control immediate by performing the logic
+ // operation we're matching using these constants for A, B, and C.
+ const uint8_t TernlogMagicA = 0xf0;
+ const uint8_t TernlogMagicB = 0xcc;
+ const uint8_t TernlogMagicC = 0xaa;
+
+ uint8_t Imm;
+ switch (FoldableOp.getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case ISD::AND: Imm = TernlogMagicB & TernlogMagicC; break;
+ case ISD::OR: Imm = TernlogMagicB | TernlogMagicC; break;
+ case ISD::XOR: Imm = TernlogMagicB ^ TernlogMagicC; break;
+ case X86ISD::ANDNP: Imm = ~(TernlogMagicB) & TernlogMagicC; break;
+ }
+
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case X86ISD::ANDNP:
+ if (A == N0)
+ Imm &= ~TernlogMagicA;
+ else
+ Imm = ~(Imm) & TernlogMagicA;
+ break;
+ case ISD::AND: Imm &= TernlogMagicA; break;
+ case ISD::OR: Imm |= TernlogMagicA; break;
+ case ISD::XOR: Imm ^= TernlogMagicA; break;
+ }
+
+ return matchVPTERNLOG(N, N, FoldableOp.getNode(), A, B, C, Imm);
+}
+
/// If the high bits of an 'and' operand are known zero, try setting the
/// high bits of an 'and' constant operand to produce a smaller encoding by
/// creating a small, sign-extended negative immediate rather than a large
@@ -4447,8 +4461,9 @@ bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) {
SDValue Imm = CurDAG->getTargetConstant(0xCA, dl, MVT::i8);
SDValue Ternlog = CurDAG->getNode(X86ISD::VPTERNLOG, dl, NVT, A, B, C, Imm);
ReplaceNode(N, Ternlog.getNode());
- SelectCode(Ternlog.getNode());
- return true;
+
+ return matchVPTERNLOG(Ternlog.getNode(), Ternlog.getNode(), Ternlog.getNode(),
+ A, B, C, 0xCA);
}
void X86DAGToDAGISel::Select(SDNode *Node) {
@@ -4598,6 +4613,14 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
return;
break;
+ case X86ISD::VPTERNLOG: {
+ uint8_t Imm = cast<ConstantSDNode>(Node->getOperand(3))->getZExtValue();
+ if (matchVPTERNLOG(Node, Node, Node, Node->getOperand(0),
+ Node->getOperand(1), Node->getOperand(2), Imm))
+ return;
+ break;
+ }
+
case X86ISD::ANDNP:
if (tryVPTERNLOG(Node))
return;
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 639c9260fb0a..f9582238d30f 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -11253,17 +11253,6 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
(!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
- // Additional patterns for matching loads in other positions.
- def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
- _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
- (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
- addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(_.VT (OpNode _.RC:$src1,
- (bitconvert (_.LdFrag addr:$src3)),
- _.RC:$src2, (i8 timm:$src4))),
- (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
- addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
-
// Additional patterns for matching zero masking with loads in other
// positions.
def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
@@ -11312,17 +11301,6 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
- // Additional patterns for matching broadcasts in other positions.
- def : Pat<(_.VT (OpNode (_.BroadcastLdFrag addr:$src3),
- _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
- (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
- addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(_.VT (OpNode _.RC:$src1,
- (_.BroadcastLdFrag addr:$src3),
- _.RC:$src2, (i8 timm:$src4))),
- (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
- addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
-
// Additional patterns for matching zero masking with broadcasts in other
// positions.
def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
@@ -11394,398 +11372,6 @@ defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
avx512vl_i64_info>, VEX_W;
-// Patterns to use VPTERNLOG for vXi16/vXi8 vectors.
-let Predicates = [HasVLX] in {
- def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
- (i8 timm:$src4))),
- (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
- timm:$src4)>;
- def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
- (loadv16i8 addr:$src3), (i8 timm:$src4))),
- (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2,
- VR128X:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3),
- VR128X:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
- (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v16i8 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
- VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
- (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v16i8 (X86vpternlog VR128X:$src1,
- (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
- VR128X:$src2, (i8 timm:$src4))),
- (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
- (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v16i8 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
- VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v16i8 (X86vpternlog VR128X:$src1,
- (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
- VR128X:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
- (i8 timm:$src4))),
- (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
- timm:$src4)>;
- def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
- (loadv8i16 addr:$src3), (i8 timm:$src4))),
- (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2,
- VR128X:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3),
- VR128X:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
- (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v8i16 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
- VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
- (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v8i16 (X86vpternlog VR128X:$src1,
- (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
- VR128X:$src2, (i8 timm:$src4))),
- (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
- (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v8i16 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
- VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v8i16 (X86vpternlog VR128X:$src1,
- (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
- VR128X:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v4i32 (X86vpternlog VR128X:$src1, VR128X:$src2,
- (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v4i32 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
- VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v4i32 (X86vpternlog VR128X:$src1,
- (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
- VR128X:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v2i64 (X86vpternlog VR128X:$src1, VR128X:$src2,
- (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v2i64 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
- VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
- (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v2i64 (X86vpternlog VR128X:$src1,
- (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
- VR128X:$src2, (i8 timm:$src4))),
- (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
- (i8 timm:$src4))),
- (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
- timm:$src4)>;
- def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
- (loadv32i8 addr:$src3), (i8 timm:$src4))),
- (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2,
- VR256X:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3),
- VR256X:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
- (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v32i8 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
- VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
- (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v32i8 (X86vpternlog VR256X:$src1,
- (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
- VR256X:$src2, (i8 timm:$src4))),
- (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
- (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v32i8 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
- VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v32i8 (X86vpternlog VR256X:$src1,
- (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
- VR256X:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
- (i8 timm:$src4))),
- (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
- timm:$src4)>;
- def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
- (loadv16i16 addr:$src3), (i8 timm:$src4))),
- (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2,
- VR256X:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3),
- VR256X:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
- (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v16i16 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
- VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
- (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v16i16 (X86vpternlog VR256X:$src1,
- (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
- VR256X:$src2, (i8 timm:$src4))),
- (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
- (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v16i16 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
- VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v16i16 (X86vpternlog VR256X:$src1,
- (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
- VR256X:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v8i32 (X86vpternlog VR256X:$src1, VR256X:$src2,
- (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v8i32 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
- VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v8i32 (X86vpternlog VR256X:$src1,
- (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
- VR256X:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v4i64 (X86vpternlog VR256X:$src1, VR256X:$src2,
- (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v4i64 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
- VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
- (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v4i64 (X86vpternlog VR256X:$src1,
- (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
- VR256X:$src2, (i8 timm:$src4))),
- (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-}
-
-let Predicates = [HasAVX512] in {
- def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
- (i8 timm:$src4))),
- (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
- timm:$src4)>;
- def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
- (loadv64i8 addr:$src3), (i8 timm:$src4))),
- (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2,
- VR512:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3),
- VR512:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
- (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v64i8 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
- VR512:$src2, VR512:$src1, (i8 timm:$src4))),
- (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v64i8 (X86vpternlog VR512:$src1,
- (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
- VR512:$src2, (i8 timm:$src4))),
- (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
- (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v64i8 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
- VR512:$src2, VR512:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v64i8 (X86vpternlog VR512:$src1,
- (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
- VR512:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
- (i8 timm:$src4))),
- (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
- timm:$src4)>;
- def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
- (loadv32i16 addr:$src3), (i8 timm:$src4))),
- (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2,
- VR512:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3),
- VR512:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
- (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
- VR512:$src2, VR512:$src1, (i8 timm:$src4))),
- (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v32i16 (X86vpternlog VR512:$src1,
- (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
- VR512:$src2, (i8 timm:$src4))),
- (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
- (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v32i16 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
- VR512:$src2, VR512:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v32i16 (X86vpternlog VR512:$src1,
- (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
- VR512:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
- (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
- VR512:$src2, VR512:$src1, (i8 timm:$src4))),
- (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v32i16 (X86vpternlog VR512:$src1,
- (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
- VR512:$src2, (i8 timm:$src4))),
- (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v16i32 (X86vpternlog VR512:$src1, VR512:$src2,
- (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v16i32 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
- VR512:$src2, VR512:$src1, (i8 timm:$src4))),
- (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v16i32 (X86vpternlog VR512:$src1,
- (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
- VR512:$src2, (i8 timm:$src4))),
- (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-
- def : Pat<(v8i64 (X86vpternlog VR512:$src1, VR512:$src2,
- (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
- (i8 timm:$src4))),
- (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- timm:$src4)>;
- def : Pat<(v8i64 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
- VR512:$src2, VR512:$src1, (i8 timm:$src4))),
- (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(v8i64 (X86vpternlog VR512:$src1,
- (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
- VR512:$src2, (i8 timm:$src4))),
- (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
- (VPTERNLOG132_imm8 timm:$src4))>;
-}
-
// Patterns to implement vnot using vpternlog instead of creating all ones
// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
// so that the result is only dependent on src0. But we use the same source
More information about the llvm-commits
mailing list