[llvm] 9201efb - [X86] Custom match X86ISD::VPTERNLOG in X86ISelDAGToDAG in order to reduce isel patterns.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 10 23:33:40 PDT 2020


Author: Craig Topper
Date: 2020-08-10T23:15:58-07:00
New Revision: 9201efb3b96e5ae229268aba8576832e80acef71

URL: https://github.com/llvm/llvm-project/commit/9201efb3b96e5ae229268aba8576832e80acef71
DIFF: https://github.com/llvm/llvm-project/commit/9201efb3b96e5ae229268aba8576832e80acef71.diff

LOG: [X86] Custom match X86ISD::VPTERNLOG in X86ISelDAGToDAG in order to reduce isel patterns.

By factoring out the end of tryVPTERNLOG, we can use the same code
to directly match X86ISD::VPTERNLOG. This allows us to remove
around 3-4K worth of X86GenDAGISel.inc.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
    llvm/lib/Target/X86/X86InstrAVX512.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 44fb0cd49ef5..7a5ae2c8a04c 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -499,6 +499,8 @@ namespace {
     bool tryShiftAmountMod(SDNode *N);
     bool tryShrinkShlLogicImm(SDNode *N);
     bool tryVPTERNLOG(SDNode *N);
+    bool matchVPTERNLOG(SDNode *Root, SDNode *ParentA, SDNode *ParentBC,
+                        SDValue A, SDValue B, SDValue C, uint8_t Imm);
     bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
     bool tryMatchBitSelect(SDNode *N);
 
@@ -3929,78 +3931,12 @@ bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) {
   return true;
 }
 
-// Try to match two logic ops to a VPTERNLOG.
-// FIXME: Handle inverted inputs?
-// FIXME: Handle more complex patterns that use an operand more than once?
-bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
-  MVT NVT = N->getSimpleValueType(0);
-
-  // Make sure we support VPTERNLOG.
-  if (!NVT.isVector() || !Subtarget->hasAVX512() ||
-      NVT.getVectorElementType() == MVT::i1)
-    return false;
-
-  // We need VLX for 128/256-bit.
-  if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
-    return false;
-
-  SDValue N0 = N->getOperand(0);
-  SDValue N1 = N->getOperand(1);
-
-  auto getFoldableLogicOp = [](SDValue Op) {
-    // Peek through single use bitcast.
-    if (Op.getOpcode() == ISD::BITCAST && Op.hasOneUse())
-      Op = Op.getOperand(0);
-
-    if (!Op.hasOneUse())
-      return SDValue();
-
-    unsigned Opc = Op.getOpcode();
-    if (Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR ||
-        Opc == X86ISD::ANDNP)
-      return Op;
-
-    return SDValue();
-  };
-
-  SDValue A, FoldableOp;
-  if ((FoldableOp = getFoldableLogicOp(N1))) {
-    A = N0;
-  } else if ((FoldableOp = getFoldableLogicOp(N0))) {
-    A = N1;
-  } else
-    return false;
-
-  SDValue B = FoldableOp.getOperand(0);
-  SDValue C = FoldableOp.getOperand(1);
-
-  // We can build the appropriate control immediate by performing the logic
-  // operation we're matching using these constants for A, B, and C.
-  const uint8_t TernlogMagicA = 0xf0;
-  const uint8_t TernlogMagicB = 0xcc;
-  const uint8_t TernlogMagicC = 0xaa;
-
-  uint8_t Imm;
-  switch (FoldableOp.getOpcode()) {
-  default: llvm_unreachable("Unexpected opcode!");
-  case ISD::AND:      Imm = TernlogMagicB & TernlogMagicC; break;
-  case ISD::OR:       Imm = TernlogMagicB | TernlogMagicC; break;
-  case ISD::XOR:      Imm = TernlogMagicB ^ TernlogMagicC; break;
-  case X86ISD::ANDNP: Imm = ~(TernlogMagicB) & TernlogMagicC; break;
-  }
-
-  switch (N->getOpcode()) {
-  default: llvm_unreachable("Unexpected opcode!");
-  case X86ISD::ANDNP:
-    if (A == N0)
-      Imm &= ~TernlogMagicA;
-    else
-      Imm = ~(Imm) & TernlogMagicA;
-    break;
-  case ISD::AND: Imm &= TernlogMagicA; break;
-  case ISD::OR:  Imm |= TernlogMagicA; break;
-  case ISD::XOR: Imm ^= TernlogMagicA; break;
-  }
+bool X86DAGToDAGISel::matchVPTERNLOG(SDNode *Root, SDNode *ParentA,
+                                     SDNode *ParentBC, SDValue A, SDValue B,
+                                     SDValue C, uint8_t Imm) {
+  assert(A.isOperandOf(ParentA));
+  assert(B.isOperandOf(ParentBC));
+  assert(C.isOperandOf(ParentBC));
 
   auto tryFoldLoadOrBCast =
       [this](SDNode *Root, SDNode *P, SDValue &L, SDValue &Base, SDValue &Scale,
@@ -4028,10 +3964,10 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
 
   bool FoldedLoad = false;
   SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
-  if (tryFoldLoadOrBCast(N, FoldableOp.getNode(), C, Tmp0, Tmp1, Tmp2, Tmp3,
-                         Tmp4)) {
+  if (tryFoldLoadOrBCast(Root, ParentBC, C, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
     FoldedLoad = true;
-  } else if (tryFoldLoadOrBCast(N, N, A, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+  } else if (tryFoldLoadOrBCast(Root, ParentA, A, Tmp0, Tmp1, Tmp2, Tmp3,
+                                Tmp4)) {
     FoldedLoad = true;
     std::swap(A, C);
     // Swap bits 1/4 and 3/6.
@@ -4041,8 +3977,8 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
     if (OldImm & 0x10) Imm |= 0x02;
     if (OldImm & 0x08) Imm |= 0x40;
     if (OldImm & 0x40) Imm |= 0x08;
-  } else if (tryFoldLoadOrBCast(N, FoldableOp.getNode(), B, Tmp0, Tmp1, Tmp2,
-                                Tmp3, Tmp4)) {
+  } else if (tryFoldLoadOrBCast(Root, ParentBC, B, Tmp0, Tmp1, Tmp2, Tmp3,
+                                Tmp4)) {
     FoldedLoad = true;
     std::swap(B, C);
     // Swap bits 1/2 and 5/6.
@@ -4054,10 +3990,12 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
     if (OldImm & 0x40) Imm |= 0x20;
   }
 
-  SDLoc DL(N);
+  SDLoc DL(Root);
 
   SDValue TImm = CurDAG->getTargetConstant(Imm, DL, MVT::i8);
 
+  MVT NVT = Root->getSimpleValueType(0);
+
   MachineSDNode *MNode;
   if (FoldedLoad) {
     SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
@@ -4111,11 +4049,87 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
     MNode = CurDAG->getMachineNode(Opc, DL, NVT, {A, B, C, TImm});
   }
 
-  ReplaceUses(SDValue(N, 0), SDValue(MNode, 0));
-  CurDAG->RemoveDeadNode(N);
+  ReplaceUses(SDValue(Root, 0), SDValue(MNode, 0));
+  CurDAG->RemoveDeadNode(Root);
   return true;
 }
 
+// Try to match two logic ops to a VPTERNLOG.
+// FIXME: Handle inverted inputs?
+// FIXME: Handle more complex patterns that use an operand more than once?
+bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
+  MVT NVT = N->getSimpleValueType(0);
+
+  // Make sure we support VPTERNLOG.
+  if (!NVT.isVector() || !Subtarget->hasAVX512() ||
+      NVT.getVectorElementType() == MVT::i1)
+    return false;
+
+  // We need VLX for 128/256-bit.
+  if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
+    return false;
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  auto getFoldableLogicOp = [](SDValue Op) {
+    // Peek through single use bitcast.
+    if (Op.getOpcode() == ISD::BITCAST && Op.hasOneUse())
+      Op = Op.getOperand(0);
+
+    if (!Op.hasOneUse())
+      return SDValue();
+
+    unsigned Opc = Op.getOpcode();
+    if (Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR ||
+        Opc == X86ISD::ANDNP)
+      return Op;
+
+    return SDValue();
+  };
+
+  SDValue A, FoldableOp;
+  if ((FoldableOp = getFoldableLogicOp(N1))) {
+    A = N0;
+  } else if ((FoldableOp = getFoldableLogicOp(N0))) {
+    A = N1;
+  } else
+    return false;
+
+  SDValue B = FoldableOp.getOperand(0);
+  SDValue C = FoldableOp.getOperand(1);
+
+  // We can build the appropriate control immediate by performing the logic
+  // operation we're matching using these constants for A, B, and C.
+  const uint8_t TernlogMagicA = 0xf0;
+  const uint8_t TernlogMagicB = 0xcc;
+  const uint8_t TernlogMagicC = 0xaa;
+
+  uint8_t Imm;
+  switch (FoldableOp.getOpcode()) {
+  default: llvm_unreachable("Unexpected opcode!");
+  case ISD::AND:      Imm = TernlogMagicB & TernlogMagicC; break;
+  case ISD::OR:       Imm = TernlogMagicB | TernlogMagicC; break;
+  case ISD::XOR:      Imm = TernlogMagicB ^ TernlogMagicC; break;
+  case X86ISD::ANDNP: Imm = ~(TernlogMagicB) & TernlogMagicC; break;
+  }
+
+  switch (N->getOpcode()) {
+  default: llvm_unreachable("Unexpected opcode!");
+  case X86ISD::ANDNP:
+    if (A == N0)
+      Imm &= ~TernlogMagicA;
+    else
+      Imm = ~(Imm) & TernlogMagicA;
+    break;
+  case ISD::AND: Imm &= TernlogMagicA; break;
+  case ISD::OR:  Imm |= TernlogMagicA; break;
+  case ISD::XOR: Imm ^= TernlogMagicA; break;
+  }
+
+  return matchVPTERNLOG(N, N, FoldableOp.getNode(), A, B, C, Imm);
+}
+
 /// If the high bits of an 'and' operand are known zero, try setting the
 /// high bits of an 'and' constant operand to produce a smaller encoding by
 /// creating a small, sign-extended negative immediate rather than a large
@@ -4447,8 +4461,9 @@ bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) {
   SDValue Imm = CurDAG->getTargetConstant(0xCA, dl, MVT::i8);
   SDValue Ternlog = CurDAG->getNode(X86ISD::VPTERNLOG, dl, NVT, A, B, C, Imm);
   ReplaceNode(N, Ternlog.getNode());
-  SelectCode(Ternlog.getNode());
-  return true;
+
+  return matchVPTERNLOG(Ternlog.getNode(), Ternlog.getNode(), Ternlog.getNode(),
+                        A, B, C, 0xCA);
 }
 
 void X86DAGToDAGISel::Select(SDNode *Node) {
@@ -4598,6 +4613,14 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
       return;
     break;
 
+  case X86ISD::VPTERNLOG: {
+    uint8_t Imm = cast<ConstantSDNode>(Node->getOperand(3))->getZExtValue();
+    if (matchVPTERNLOG(Node, Node, Node, Node->getOperand(0),
+                       Node->getOperand(1), Node->getOperand(2), Imm))
+      return;
+    break;
+  }
+
   case X86ISD::ANDNP:
     if (tryVPTERNLOG(Node))
       return;

diff  --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 639c9260fb0a..f9582238d30f 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -11253,17 +11253,6 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
             (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
              _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
 
-  // Additional patterns for matching loads in other positions.
-  def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
-                          _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
-            (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
-                                   addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(_.VT (OpNode _.RC:$src1,
-                          (bitconvert (_.LdFrag addr:$src3)),
-                          _.RC:$src2, (i8 timm:$src4))),
-            (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
-                                   addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
-
   // Additional patterns for matching zero masking with loads in other
   // positions.
   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
@@ -11312,17 +11301,6 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
 
-  // Additional patterns for matching broadcasts in other positions.
-  def : Pat<(_.VT (OpNode (_.BroadcastLdFrag addr:$src3),
-                          _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
-            (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
-                                   addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(_.VT (OpNode _.RC:$src1,
-                          (_.BroadcastLdFrag addr:$src3),
-                          _.RC:$src2, (i8 timm:$src4))),
-            (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
-                                   addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
-
   // Additional patterns for matching zero masking with broadcasts in other
   // positions.
   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
@@ -11394,398 +11372,6 @@ defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
                                         avx512vl_i64_info>, VEX_W;
 
-// Patterns to use VPTERNLOG for vXi16/vXi8 vectors.
-let Predicates = [HasVLX] in {
-  def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
-                                 (i8 timm:$src4))),
-            (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
-                               timm:$src4)>;
-  def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
-                                 (loadv16i8 addr:$src3), (i8 timm:$src4))),
-            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
-                               timm:$src4)>;
-  def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2,
-                                 VR128X:$src1, (i8 timm:$src4))),
-            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
-                               (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3),
-                                 VR128X:$src2, (i8 timm:$src4))),
-            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
-                               (VPTERNLOG132_imm8 timm:$src4))>;
-
-  def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
-                                 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
-                                 (i8 timm:$src4))),
-            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
-                                timm:$src4)>;
-  def : Pat<(v16i8 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
-                                 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
-            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
-                                (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v16i8 (X86vpternlog VR128X:$src1,
-                                 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
-                                 VR128X:$src2, (i8 timm:$src4))),
-            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
-                               (VPTERNLOG132_imm8 timm:$src4))>;
-
-  def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
-                                 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
-                                 (i8 timm:$src4))),
-            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
-                                timm:$src4)>;
-  def : Pat<(v16i8 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
-                                 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
-            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
-                                (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v16i8 (X86vpternlog VR128X:$src1,
-                                 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
-                                 VR128X:$src2, (i8 timm:$src4))),
-            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
-                                (VPTERNLOG132_imm8 timm:$src4))>;
-
-  def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
-                                 (i8 timm:$src4))),
-            (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
-                               timm:$src4)>;
-  def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
-                                 (loadv8i16 addr:$src3), (i8 timm:$src4))),
-            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
-                               timm:$src4)>;
-  def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2,
-                                 VR128X:$src1, (i8 timm:$src4))),
-            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
-                               (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3),
-                                 VR128X:$src2, (i8 timm:$src4))),
-            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
-                               (VPTERNLOG132_imm8 timm:$src4))>;
-
-  def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
-                                 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
-                                 (i8 timm:$src4))),
-            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
-                                timm:$src4)>;
-  def : Pat<(v8i16 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
-                                 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
-            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
-                                (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v8i16 (X86vpternlog VR128X:$src1,
-                                 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
-                                 VR128X:$src2, (i8 timm:$src4))),
-            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
-                               (VPTERNLOG132_imm8 timm:$src4))>;
-
-  def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
-                                 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
-                                 (i8 timm:$src4))),
-            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
-                                timm:$src4)>;
-  def : Pat<(v8i16 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
-                                 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
-            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
-                                (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v8i16 (X86vpternlog VR128X:$src1,
-                                 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
-                                 VR128X:$src2, (i8 timm:$src4))),
-            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
-                                (VPTERNLOG132_imm8 timm:$src4))>;
-
-  def : Pat<(v4i32 (X86vpternlog VR128X:$src1, VR128X:$src2,
-                                 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
-                                 (i8 timm:$src4))),
-            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
-                                timm:$src4)>;
-  def : Pat<(v4i32 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
-                                 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
-            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
-                                (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v4i32 (X86vpternlog VR128X:$src1,
-                                 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
-                                 VR128X:$src2, (i8 timm:$src4))),
-            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
-                                (VPTERNLOG132_imm8 timm:$src4))>;
-
-  def : Pat<(v2i64 (X86vpternlog VR128X:$src1, VR128X:$src2,
-                                 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
-                                 (i8 timm:$src4))),
-            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
-                                timm:$src4)>;
-  def : Pat<(v2i64 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
-                                 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
-            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
-                                (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v2i64 (X86vpternlog VR128X:$src1,
-                                 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
-                                 VR128X:$src2, (i8 timm:$src4))),
-            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
-                               (VPTERNLOG132_imm8 timm:$src4))>;
-
-  def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
-                                 (i8 timm:$src4))),
-            (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
-                               timm:$src4)>;
-  def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
-                                 (loadv32i8 addr:$src3), (i8 timm:$src4))),
-            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
-                               timm:$src4)>;
-  def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2,
-                                 VR256X:$src1, (i8 timm:$src4))),
-            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
-                               (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3),
-                                 VR256X:$src2, (i8 timm:$src4))),
-            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
-                               (VPTERNLOG132_imm8 timm:$src4))>;
-
-  def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
-                                 (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
-                                 (i8 timm:$src4))),
-            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
-                                timm:$src4)>;
-  def : Pat<(v32i8 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
-                                 VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
-            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
-                                (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v32i8 (X86vpternlog VR256X:$src1,
-                                 (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
-                                 VR256X:$src2, (i8 timm:$src4))),
-            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
-                               (VPTERNLOG132_imm8 timm:$src4))>;
-
-  def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
-                                 (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
-                                 (i8 timm:$src4))),
-            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
-                                timm:$src4)>;
-  def : Pat<(v32i8 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
-                                 VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
-            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
-                                (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v32i8 (X86vpternlog VR256X:$src1,
-                                 (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
-                                 VR256X:$src2, (i8 timm:$src4))),
-            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
-                                (VPTERNLOG132_imm8 timm:$src4))>;
-
-  def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
-                                  (i8 timm:$src4))),
-            (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
-                               timm:$src4)>;
-  def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
-                                  (loadv16i16 addr:$src3), (i8 timm:$src4))),
-            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
-                               timm:$src4)>;
-  def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2,
-                                  VR256X:$src1, (i8 timm:$src4))),
-            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
-                               (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3),
-                                  VR256X:$src2, (i8 timm:$src4))),
-            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
-                               (VPTERNLOG132_imm8 timm:$src4))>;
-
-  def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
-                                  (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
-                                  (i8 timm:$src4))),
-            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
-                                timm:$src4)>;
-  def : Pat<(v16i16 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
-                                 VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
-            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
-                                (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v16i16 (X86vpternlog VR256X:$src1,
-                                  (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
-                                  VR256X:$src2, (i8 timm:$src4))),
-            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
-                               (VPTERNLOG132_imm8 timm:$src4))>;
-
-  def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
-                                  (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
-                                  (i8 timm:$src4))),
-            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
-                                timm:$src4)>;
-  def : Pat<(v16i16 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
-                                  VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
-            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
-                                (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v16i16 (X86vpternlog VR256X:$src1,
-                                  (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
-                                  VR256X:$src2, (i8 timm:$src4))),
-            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
-                                (VPTERNLOG132_imm8 timm:$src4))>;
-
-  def : Pat<(v8i32 (X86vpternlog VR256X:$src1, VR256X:$src2,
-                                 (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
-                                 (i8 timm:$src4))),
-            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
-                                timm:$src4)>;
-  def : Pat<(v8i32 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
-                                  VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
-            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
-                                (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v8i32 (X86vpternlog VR256X:$src1,
-                                 (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
-                                 VR256X:$src2, (i8 timm:$src4))),
-            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
-                                (VPTERNLOG132_imm8 timm:$src4))>;
-
-  def : Pat<(v4i64 (X86vpternlog VR256X:$src1, VR256X:$src2,
-                                 (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
-                                 (i8 timm:$src4))),
-            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
-                                timm:$src4)>;
-  def : Pat<(v4i64 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
-                                 VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
-            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
-                                (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v4i64 (X86vpternlog VR256X:$src1,
-                                 (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
-                                 VR256X:$src2, (i8 timm:$src4))),
-            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
-                               (VPTERNLOG132_imm8 timm:$src4))>;
-}
-
-let Predicates = [HasAVX512] in {
-  def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
-                                 (i8 timm:$src4))),
-            (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
-                            timm:$src4)>;
-  def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
-                                 (loadv64i8 addr:$src3), (i8 timm:$src4))),
-            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
-                            timm:$src4)>;
-  def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2,
-                                  VR512:$src1, (i8 timm:$src4))),
-            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
-                            (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3),
-                                 VR512:$src2, (i8 timm:$src4))),
-            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
-                            (VPTERNLOG132_imm8 timm:$src4))>;
-
-  def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
-                                 (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
-                                 (i8 timm:$src4))),
-            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
-                             timm:$src4)>;
-  def : Pat<(v64i8 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
-                                 VR512:$src2, VR512:$src1, (i8 timm:$src4))),
-            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
-                             (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v64i8 (X86vpternlog VR512:$src1,
-                                 (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
-                                 VR512:$src2, (i8 timm:$src4))),
-            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
-                             (VPTERNLOG132_imm8 timm:$src4))>;
-
-  def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
-                                 (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
-                                 (i8 timm:$src4))),
-            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
-                                timm:$src4)>;
-  def : Pat<(v64i8 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
-                                 VR512:$src2, VR512:$src1, (i8 timm:$src4))),
-            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
-                                (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v64i8 (X86vpternlog VR512:$src1,
-                                 (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
-                                 VR512:$src2, (i8 timm:$src4))),
-            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
-                                (VPTERNLOG132_imm8 timm:$src4))>;
-
-  def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
-                                  (i8 timm:$src4))),
-            (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
-                            timm:$src4)>;
-  def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
-                                  (loadv32i16 addr:$src3), (i8 timm:$src4))),
-            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
-                            timm:$src4)>;
-  def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2,
-                                  VR512:$src1, (i8 timm:$src4))),
-            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
-                            (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3),
-                                  VR512:$src2, (i8 timm:$src4))),
-            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
-                            (VPTERNLOG132_imm8 timm:$src4))>;
-
-  def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
-                                  (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
-                                  (i8 timm:$src4))),
-            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
-                             timm:$src4)>;
-  def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
-                                 VR512:$src2, VR512:$src1, (i8 timm:$src4))),
-            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
-                                (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v32i16 (X86vpternlog VR512:$src1,
-                                  (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
-                                  VR512:$src2, (i8 timm:$src4))),
-            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
-                               (VPTERNLOG132_imm8 timm:$src4))>;
-
-  def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
-                                  (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
-                                  (i8 timm:$src4))),
-            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
-                             timm:$src4)>;
-  def : Pat<(v32i16 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
-                                  VR512:$src2, VR512:$src1, (i8 timm:$src4))),
-            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
-                             (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v32i16 (X86vpternlog VR512:$src1,
-                                  (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
-                                  VR512:$src2, (i8 timm:$src4))),
-            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
-                             (VPTERNLOG132_imm8 timm:$src4))>;
-
-  def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
-                                  (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
-                                  (i8 timm:$src4))),
-            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
-                             timm:$src4)>;
-  def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
-                                 VR512:$src2, VR512:$src1, (i8 timm:$src4))),
-            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
-                                (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v32i16 (X86vpternlog VR512:$src1,
-                                  (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
-                                  VR512:$src2, (i8 timm:$src4))),
-            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
-                               (VPTERNLOG132_imm8 timm:$src4))>;
-
-  def : Pat<(v16i32 (X86vpternlog VR512:$src1, VR512:$src2,
-                                  (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
-                                  (i8 timm:$src4))),
-            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
-                             timm:$src4)>;
-  def : Pat<(v16i32 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
-                                  VR512:$src2, VR512:$src1, (i8 timm:$src4))),
-            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
-                             (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v16i32 (X86vpternlog VR512:$src1,
-                                  (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
-                                  VR512:$src2, (i8 timm:$src4))),
-            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
-                             (VPTERNLOG132_imm8 timm:$src4))>;
-
-  def : Pat<(v8i64 (X86vpternlog VR512:$src1, VR512:$src2,
-                                  (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
-                                  (i8 timm:$src4))),
-            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
-                             timm:$src4)>;
-  def : Pat<(v8i64 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
-                                 VR512:$src2, VR512:$src1, (i8 timm:$src4))),
-            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
-                                (VPTERNLOG321_imm8 timm:$src4))>;
-  def : Pat<(v8i64 (X86vpternlog VR512:$src1,
-                                  (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
-                                  VR512:$src2, (i8 timm:$src4))),
-            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
-                               (VPTERNLOG132_imm8 timm:$src4))>;
-}
-
 // Patterns to implement vnot using vpternlog instead of creating all ones
 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
 // so that the result is only dependent on src0. But we use the same source


        


More information about the llvm-commits mailing list