[llvm-branch-commits] [llvm] b955c7e - [VE] VE Vector Predicated SDNode, vector add isel and tests

Mon Nov 23 08:22:08 PST 2020

Author: Simon Moll
Date: 2020-11-23T17:17:07+01:00
New Revision: b955c7e63001068f7829827c327dc96ca9a05e8c

URL: https://github.com/llvm/llvm-project/commit/b955c7e63001068f7829827c327dc96ca9a05e8c
DIFF: https://github.com/llvm/llvm-project/commit/b955c7e63001068f7829827c327dc96ca9a05e8c.diff

LOG: [VE] VE Vector Predicated SDNode, vector add isel and tests

VE Vector Predicated (VVP) SDNodes form an intermediate layer between VE
vector instructions and the initial SDNodes.

We introduce 'vvp_add' with isel and tests as the first of these VVP
nodes. VVP nodes have a mask and explicit vector length operand, which
we will make proper use of later.

Reviewed By: kaz7

Differential Revision: https://reviews.llvm.org/D91802

Added: 
    llvm/lib/Target/VE/VVPInstrInfo.td
    llvm/lib/Target/VE/VVPInstrPatternsVec.td
    llvm/lib/Target/VE/VVPNodes.def
    llvm/test/CodeGen/VE/Vector/vec_add.ll

Modified: 
    llvm/lib/Target/VE/VEISelLowering.cpp
    llvm/lib/Target/VE/VEISelLowering.h
    llvm/lib/Target/VE/VEInstrInfo.td
    llvm/lib/Target/VE/VEInstrPatternsVec.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index 30b6aa96edec..cc7f5f6800ec 100644

--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -254,8 +254,17 @@ void VETargetLowering::initSPUActions() {
 }
 
 void VETargetLowering::initVPUActions() {
-  for (MVT LegalVecVT : AllVectorVTs)
+  for (MVT LegalVecVT : AllVectorVTs) {
     setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom);
+    // Translate all vector instructions with legal element types to VVP_*
+    // nodes.
+    // TODO We will custom-widen into VVP_* nodes in the future. While we are
+    // buildling the infrastructure for this, we only do this for legal vector
+    // VTs.
+#define ADD_VVP_OP(VVP_NAME, ISD_NAME)                                         \
+  setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
+#include "VVPNodes.def"
+  }
 }
 
 SDValue
@@ -846,6 +855,10 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
     TARGET_NODE_CASE(VEC_BROADCAST)
     TARGET_NODE_CASE(RET_FLAG)
     TARGET_NODE_CASE(GLOBAL_BASE_REG)
+
+    // Register the VVP_* SDNodes.
+#define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
+#include "VVPNodes.def"
   }
 #undef TARGET_NODE_CASE
   return nullptr;
@@ -1403,6 +1416,10 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     return lowerVASTART(Op, DAG);
   case ISD::VAARG:
     return lowerVAARG(Op, DAG);
+
+#define ADD_BINARY_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
+#include "VVPNodes.def"
+    return lowerToVVP(Op, DAG);
   }
 }
 /// } Custom Lower
@@ -1665,3 +1682,53 @@ bool VETargetLowering::hasAndNot(SDValue Y) const {
   // It's ok for generic registers.
   return true;
 }
+
+/// \returns the VVP_* SDNode opcode corresponsing to \p OC.
+static Optional<unsigned> getVVPOpcode(unsigned OC) {
+  switch (OC) {
+#define ADD_VVP_OP(VVPNAME, SDNAME)                                            \
+  case VEISD::VVPNAME:                                                         \
+  case ISD::SDNAME:                                                            \
+    return VEISD::VVPNAME;
+#include "VVPNodes.def"
+  }
+  return None;
+}
+
+SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
+  // Can we represent this as a VVP node.
+  auto OCOpt = getVVPOpcode(Op->getOpcode());
+  if (!OCOpt.hasValue())
+    return SDValue();
+  unsigned VVPOC = OCOpt.getValue();
+
+  // The representative and legalized vector type of this operation.
+  EVT OpVecVT = Op.getValueType();
+  EVT LegalVecVT = getTypeToTransformTo(*DAG.getContext(), OpVecVT);
+
+  // Materialize the VL parameter.
+  SDLoc DL(Op);
+  SDValue AVL = DAG.getConstant(OpVecVT.getVectorNumElements(), DL, MVT::i32);
+  MVT MaskVT = MVT::v256i1;
+  SDValue ConstTrue = DAG.getConstant(1, DL, MVT::i32);
+  SDValue Mask = DAG.getNode(VEISD::VEC_BROADCAST, DL, MaskVT,
+                             ConstTrue); // emit a VEISD::VEC_BROADCAST here.
+
+  // Categories we are interested in.
+  bool IsBinaryOp = false;
+
+  switch (VVPOC) {
+#define ADD_BINARY_VVP_OP(VVPNAME, ...)                                        \
+  case VEISD::VVPNAME:                                                         \
+    IsBinaryOp = true;                                                         \
+    break;
+#include "VVPNodes.def"
+  }
+
+  if (IsBinaryOp) {
+    assert(LegalVecVT.isSimple());
+    return DAG.getNode(VVPOC, DL, LegalVecVT, Op->getOperand(0),
+                       Op->getOperand(1), Mask, AVL);
+  }
+  llvm_unreachable("lowerToVVP called for unexpected SDNode.");
+}

diff  --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index e12bef882d8a..9924db647f46 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -39,6 +39,10 @@ enum NodeType : unsigned {
   CALL,            // A call instruction.
   RET_FLAG,        // Return with a flag operand.
   GLOBAL_BASE_REG, // Global base reg for PIC.
+
+  // VVP_* nodes.
+#define ADD_VVP_OP(VVP_NAME, ...) VVP_NAME,
+#include "VVPNodes.def"
 };
 }
 
@@ -120,6 +124,10 @@ class VETargetLowering : public TargetLowering {
   SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
   /// } Custom Lower
 
+  /// VVP Lowering {
+  SDValue lowerToVVP(SDValue Op, SelectionDAG &DAG) const;
+  /// } VVPLowering
+
   /// Custom DAGCombine {
   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 

diff  --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index 863213f179ce..86635adf9ef2 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -245,6 +245,7 @@ def fplomsbzero : PatLeaf<(fpimm), [{ return (getFpImmVal(N) & 0x80000000)
                                         == 0; }]>;
 def fplozero    : PatLeaf<(fpimm), [{ return (getFpImmVal(N) & 0xffffffff)
                                         == 0; }]>;
+def nonzero     : PatLeaf<(imm), [{ return N->getSExtValue() !=0 ; }]>;
 
 def CCSIOp : PatLeaf<(cond), [{
   switch (N->get()) {
@@ -2219,6 +2220,22 @@ def : Pat<(i32 (and i32:$val, 0xffff)),
 def : Pat<(i64 (and i64:$val, 0xffffffff)),
           (ANDrm $val, !add(32, 64))>;
 
+//===----------------------------------------------------------------------===//
+// Vector Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+// Custom intermediate ISDs.
+class IsVLVT<int OpIdx> : SDTCisVT<OpIdx,i32>;
+def vec_broadcast       : SDNode<"VEISD::VEC_BROADCAST", SDTypeProfile<1, 2,
+                                 [SDTCisVec<0>, IsVLVT<2>]>>;
+
+// Whether this is an all-true mask (assuming undef-bits above VL are all-true).
+def true_mask           : PatLeaf<
+                            (vec_broadcast (i32 nonzero), (i32 srcvalue))>;
+// Match any broadcast (ignoring VL).
+def any_broadcast       : PatFrag<(ops node:$sx),
+                                  (vec_broadcast node:$sx, (i32 srcvalue))>;
+
 // Vector instructions.
 include "VEInstrVec.td"
 
@@ -2227,3 +2244,6 @@ include "VEInstrIntrinsicVL.td"
 
 // Patterns and intermediate SD nodes (VEC_*).
 include "VEInstrPatternsVec.td"
+
+// Patterns and intermediate SD nodes (VVP_*).
+include "VVPInstrPatternsVec.td"

diff  --git a/llvm/lib/Target/VE/VEInstrPatternsVec.td b/llvm/lib/Target/VE/VEInstrPatternsVec.td
index 947b1ac8fb54..c08e7ba7bbe7 100644
--- a/llvm/lib/Target/VE/VEInstrPatternsVec.td
+++ b/llvm/lib/Target/VE/VEInstrPatternsVec.td
@@ -15,10 +15,6 @@
 // Instruction format superclass
 //===----------------------------------------------------------------------===//
 
-// Custom intermediate ISDs.
-class IsVLVT<int OpIdx> : SDTCisVT<OpIdx,i32>;
-def vec_broadcast   : SDNode<"VEISD::VEC_BROADCAST", SDTypeProfile<1, 2,  [SDTCisVec<0>, IsVLVT<2>]>>;
-
 multiclass vbrd_elem32<ValueType v32, ValueType s32, SDPatternOperator ImmOp, SDNodeXForm ImmCast, int SubRegIdx> {
   // VBRDil
   def : Pat<(v32 (vec_broadcast (s32 ImmOp:$sy), i32:$vl)),

diff  --git a/llvm/lib/Target/VE/VVPInstrInfo.td b/llvm/lib/Target/VE/VVPInstrInfo.td
new file mode 100644
index 000000000000..81fbfe03b48f
--- /dev/null
+++ b/llvm/lib/Target/VE/VVPInstrInfo.td
@@ -0,0 +1,43 @@
+//===-------------- VVPInstrInfo.td - VVP_* SDNode patterns ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the VE Vector Predicated SDNodes (VVP SDNodes).  VVP
+// SDNodes are an intermediate isel layer between the vector SDNodes emitted by
+// LLVM and the actual VE vector instructions. For example:
+//
+//  ADD(x,y)   -->   VVP_ADD(x,y,mask,evl)   -->   VADDSWSXrvml(x,y,mask,evl)
+//     ^                      ^                            ^
+//  The standard     The VVP layer SDNode.        The VE vector instruction.
+//  SDNode.
+//
+// TODO explain how VVP nodes relate to VP SDNodes once VP ISel is uptream.
+//===----------------------------------------------------------------------===//
+
+// Binary Operators {
+
+// BinaryOp(x,y,mask,vl)
+def SDTIntBinOpVVP : SDTypeProfile<1, 4, [     // vp_add, vp_and, etc.
+  SDTCisSameAs<0, 1>,
+  SDTCisSameAs<0, 2>,
+  SDTCisInt<0>,
+  SDTCisSameNumEltsAs<0, 3>,
+  IsVLVT<4>
+]>;
+
+// Binary operator commutative pattern.
+class vvp_commutative<SDNode RootOp> :
+  PatFrags<
+  (ops node:$lhs, node:$rhs, node:$mask, node:$vlen),
+  [(RootOp node:$lhs, node:$rhs, node:$mask, node:$vlen),
+   (RootOp node:$rhs, node:$lhs, node:$mask, node:$vlen)]>;
+
+// VVP node definitions.
+def vvp_add    : SDNode<"VEISD::VVP_ADD",  SDTIntBinOpVVP>;
+def c_vvp_add  : vvp_commutative<vvp_add>;
+
+// } Binary Operators

diff  --git a/llvm/lib/Target/VE/VVPInstrPatternsVec.td b/llvm/lib/Target/VE/VVPInstrPatternsVec.td
new file mode 100644
index 000000000000..2345173314a4
--- /dev/null
+++ b/llvm/lib/Target/VE/VVPInstrPatternsVec.td
@@ -0,0 +1,68 @@
+//===----------- VVPInstrPatternsVec.td - VVP_* SDNode patterns -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes how VVP_* SDNodes are lowered to machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// VVP SDNode definitions.
+//
+//===----------------------------------------------------------------------===//
+include "VVPInstrInfo.td"
+
+multiclass VectorBinaryArith<
+    SDPatternOperator OpNode,
+    ValueType ScalarVT, ValueType DataVT, ValueType MaskVT,
+    string OpBaseName,
+    SDPatternOperator ImmOp, SDNodeXForm ImmCast> {
+  // No mask.
+  def : Pat<(OpNode
+                (any_broadcast ScalarVT:$sx),
+                DataVT:$vy, (MaskVT true_mask), i32:$avl),
+            (!cast<Instruction>(OpBaseName#"rvl")
+                ScalarVT:$sx, $vy, $avl)>;
+  def : Pat<(OpNode DataVT:$vx, DataVT:$vy, (MaskVT true_mask), i32:$avl),
+            (!cast<Instruction>(OpBaseName#"vvl")
+                $vx, $vy, $avl)>;
+
+  // Mask.
+  def : Pat<(OpNode
+                (any_broadcast ScalarVT:$sx),
+                DataVT:$vy, MaskVT:$mask, i32:$avl),
+            (!cast<Instruction>(OpBaseName#"rvml")
+                ScalarVT:$sx, $vy, $mask, $avl)>;
+  def : Pat<(OpNode DataVT:$vx, DataVT:$vy, MaskVT:$mask, i32:$avl),
+            (!cast<Instruction>(OpBaseName#"vvml")
+                $vx, $vy, $mask, $avl)>;
+
+  // TODO We do not specify patterns for the immediate variants here. There
+  // will be an immediate folding pass that takes care of switching to the
+  // immediate variant where applicable.
+
+  // TODO Fold vvp_select into passthru.
+}
+
+// Expand both 64bit and 32 bit variant (256 elements)
+multiclass VectorBinaryArith_ShortLong<
+    SDPatternOperator OpNode,
+    ValueType LongScalarVT, ValueType LongDataVT, string LongOpBaseName,
+    ValueType ShortScalarVT, ValueType ShortDataVT, string ShortOpBaseName> {
+  defm : VectorBinaryArith<OpNode,
+                           LongScalarVT, LongDataVT, v256i1,
+                           LongOpBaseName, simm7, LO7>;
+  defm : VectorBinaryArith<OpNode,
+                           ShortScalarVT, ShortDataVT, v256i1,
+                           ShortOpBaseName, simm7, LO7>;
+}
+
+
+defm : VectorBinaryArith_ShortLong<c_vvp_add,
+                                   i64, v256i64, "VADDSL",
+                                   i32, v256i32, "VADDSWSX">;

diff  --git a/llvm/lib/Target/VE/VVPNodes.def b/llvm/lib/Target/VE/VVPNodes.def
new file mode 100644
index 000000000000..4319b332388e
--- /dev/null
+++ b/llvm/lib/Target/VE/VVPNodes.def
@@ -0,0 +1,32 @@
+//===-- VVPNodes.def - Lists & properties of VE Vector Predication Nodes --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all VVP_* SDNodes and their properties
+//
+//===----------------------------------------------------------------------===//
+
+/// ADD_VVP_OP(VVPNAME,SDNAME)
+/// \p VVPName is a VVP SDNode operator.
+/// \p SDNAME is the generic SD opcode corresponding to \p VVPName.
+#ifndef ADD_VVP_OP
+#define ADD_VVP_OP(X, Y)
+#endif
+
+/// ADD_BINARY_VVP_OP(VVPNAME,SDNAME)
+/// \p VVPName is a VVP Binary operator.
+/// \p SDNAME is the generic SD opcode corresponding to \p VVPName.
+#ifndef ADD_BINARY_VVP_OP
+#define ADD_BINARY_VVP_OP(X,Y) ADD_VVP_OP(X,Y)
+#endif
+
+// Integer arithmetic.
+ADD_BINARY_VVP_OP(VVP_ADD,ADD)
+
+
+#undef ADD_BINARY_VVP_OP
+#undef ADD_VVP_OP

diff  --git a/llvm/test/CodeGen/VE/Vector/vec_add.ll b/llvm/test/CodeGen/VE/Vector/vec_add.ll
new file mode 100644
index 000000000000..74421332daab
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Vector/vec_add.ll
@@ -0,0 +1,132 @@
+; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
+
+; <256 x i32>
+
+; Function Attrs: nounwind
+define fastcc <256 x i32> @add_vv_v256i32(<256 x i32> %x, <256 x i32> %y) {
+; CHECK-LABEL: add_vv_v256i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, 256
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vadds.w.sx %v0, %v0, %v1
+; CHECK-NEXT: b.l.t (, %s10)
+  %z = add <256 x i32> %x, %y
+  ret <256 x i32> %z
+}
+
+; Function Attrs: nounwind
+define fastcc <256 x i32> @add_sv_v256i32(i32 %x, <256 x i32> %y) {
+; CHECK-LABEL: add_sv_v256i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: lea %s1, 256
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vadds.w.sx %v0, %s0, %v0
+; CHECK-NEXT: b.l.t (, %s10)
+  %xins = insertelement <256 x i32> undef, i32 %x, i32 0
+  %vx = shufflevector <256 x i32> %xins, <256 x i32> undef, <256 x i32> zeroinitializer
+  %z = add <256 x i32> %vx, %y
+  ret <256 x i32> %z
+}
+
+; Function Attrs: nounwind
+define fastcc <256 x i32> @add_vs_v256i32(<256 x i32> %x, i32 %y) {
+; CHECK-LABEL: add_vs_v256i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: lea %s1, 256
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vadds.w.sx %v0, %s0, %v0
+; CHECK-NEXT: b.l.t (, %s10)
+  %yins = insertelement <256 x i32> undef, i32 %y, i32 0
+  %vy = shufflevector <256 x i32> %yins, <256 x i32> undef, <256 x i32> zeroinitializer
+  %z = add <256 x i32> %x, %vy
+  ret <256 x i32> %z
+}
+
+
+
+; <256 x i64>
+
+; Function Attrs: nounwind
+define fastcc <256 x i64> @add_vv_v256i64(<256 x i64> %x, <256 x i64> %y) {
+; CHECK-LABEL: add_vv_v256i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, 256
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vadds.l %v0, %v0, %v1
+; CHECK-NEXT: b.l.t (, %s10)
+  %z = add <256 x i64> %x, %y
+  ret <256 x i64> %z
+}
+
+; Function Attrs: nounwind
+define fastcc <256 x i64> @add_sv_v256i64(i64 %x, <256 x i64> %y) {
+; CHECK-LABEL: add_sv_v256i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s1, 256
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vadds.l %v0, %s0, %v0
+; CHECK-NEXT: b.l.t (, %s10)
+  %xins = insertelement <256 x i64> undef, i64 %x, i32 0
+  %vx = shufflevector <256 x i64> %xins, <256 x i64> undef, <256 x i32> zeroinitializer
+  %z = add <256 x i64> %vx, %y
+  ret <256 x i64> %z
+}
+
+; Function Attrs: nounwind
+define fastcc <256 x i64> @add_vs_v256i64(<256 x i64> %x, i64 %y) {
+; CHECK-LABEL: add_vs_v256i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s1, 256
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vadds.l %v0, %s0, %v0
+; CHECK-NEXT: b.l.t (, %s10)
+  %yins = insertelement <256 x i64> undef, i64 %y, i32 0
+  %vy = shufflevector <256 x i64> %yins, <256 x i64> undef, <256 x i32> zeroinitializer
+  %z = add <256 x i64> %x, %vy
+  ret <256 x i64> %z
+}
+
+; <128 x i64>
+; We expect this to be widened.
+
+; Function Attrs: nounwind
+define fastcc <128 x i64> @add_vv_v128i64(<128 x i64> %x, <128 x i64> %y) {
+; CHECK-LABEL: add_vv_v128i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, 256
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vadds.l %v0, %v0, %v1
+; CHECK-NEXT: b.l.t (, %s10)
+  %z = add <128 x i64> %x, %y
+  ret <128 x i64> %z
+}
+
+; <256 x i16>
+; We expect promotion.
+
+; Function Attrs: nounwind
+define fastcc <256 x i16> @add_vv_v256i16(<256 x i16> %x, <256 x i16> %y) {
+; CHECK-LABEL: add_vv_v256i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, 256
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vadds.w.sx %v0, %v0, %v1
+; CHECK-NEXT: b.l.t (, %s10)
+  %z = add <256 x i16> %x, %y
+  ret <256 x i16> %z
+}
+
+; <128 x i16>
+; We expect this to be scalarized (for now).
+
+; Function Attrs: nounwind
+define fastcc <128 x i16> @add_vv_v128i16(<128 x i16> %x, <128 x i16> %y) {
+; CHECK-LABEL: add_vv_v128i16:
+; CHECK: # %bb.0:
+; CHECK-NOT: vadd
+  %z = add <128 x i16> %x, %y
+  ret <128 x i16> %z
+}
+