[llvm] 53efbc1 - [VE] v256i1 broadcast isel and tests

Tue Feb 15 03:42:00 PST 2022

Author: Simon Moll
Date: 2022-02-15T12:40:51+01:00
New Revision: 53efbc15cb8e2b98bbd02ff39765561d2426b111

URL: https://github.com/llvm/llvm-project/commit/53efbc15cb8e2b98bbd02ff39765561d2426b111
DIFF: https://github.com/llvm/llvm-project/commit/53efbc15cb8e2b98bbd02ff39765561d2426b111.diff

LOG: [VE] v256i1 broadcast isel and tests

Reviewed By: kaz7

Differential Revision: https://reviews.llvm.org/D119241

Added: 
    llvm/test/CodeGen/VE/Vector/mask_broadcast.ll

Modified: 
    llvm/lib/Target/VE/VE.h
    llvm/lib/Target/VE/VECustomDAG.cpp
    llvm/lib/Target/VE/VECustomDAG.h
    llvm/lib/Target/VE/VEISelDAGToDAG.cpp
    llvm/lib/Target/VE/VEISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/VE/VE.h b/llvm/lib/Target/VE/VE.h
index 2a729a1a311c3..d57a521b3e2db 100644

--- a/llvm/lib/Target/VE/VE.h
+++ b/llvm/lib/Target/VE/VE.h
@@ -370,5 +370,8 @@ inline static uint64_t mimm2Val(uint64_t Val) {
 inline unsigned M0(unsigned Val) { return Val + 64; }
 inline unsigned M1(unsigned Val) { return Val; }
 
+static const unsigned StandardVectorWidth = 256;
+static const unsigned PackedVectorWidth = 512;
+
 } // namespace llvm
 #endif

diff  --git a/llvm/lib/Target/VE/VECustomDAG.cpp b/llvm/lib/Target/VE/VECustomDAG.cpp
index 0513cca286588..7b12bb898c391 100644
--- a/llvm/lib/Target/VE/VECustomDAG.cpp
+++ b/llvm/lib/Target/VE/VECustomDAG.cpp
@@ -19,14 +19,28 @@
 
 namespace llvm {
 
-static const int StandardVectorWidth = 256;
-
 bool isPackedVectorType(EVT SomeVT) {
   if (!SomeVT.isVector())
     return false;
   return SomeVT.getVectorNumElements() > StandardVectorWidth;
 }
 
+MVT getLegalVectorType(Packing P, MVT ElemVT) {
+  return MVT::getVectorVT(ElemVT, P == Packing::Normal ? StandardVectorWidth
+                                                       : PackedVectorWidth);
+}
+
+Packing getTypePacking(EVT VT) {
+  assert(VT.isVector());
+  return isPackedVectorType(VT) ? Packing::Dense : Packing::Normal;
+}
+
+bool isMaskType(EVT SomeVT) {
+  if (!SomeVT.isVector())
+    return false;
+  return SomeVT.getVectorElementType() == MVT::i1;
+}
+
 /// \returns the VVP_* SDNode opcode corresponsing to \p OC.
 Optional<unsigned> getVVPOpcode(unsigned Opcode) {
   switch (Opcode) {
@@ -121,11 +135,55 @@ SDValue VECustomDAG::getConstant(uint64_t Val, EVT VT, bool IsTarget,
   return DAG.getConstant(Val, DL, VT, IsTarget, IsOpaque);
 }
 
+SDValue VECustomDAG::getConstantMask(Packing Packing, bool AllTrue) const {
+  auto MaskVT = getLegalVectorType(Packing, MVT::i1);
+
+  // VEISelDAGtoDAG will replace this pattern with the constant-true VM.
+  auto TrueVal = DAG.getConstant(-1, DL, MVT::i32);
+  auto AVL = getConstant(MaskVT.getVectorNumElements(), MVT::i32);
+  auto Res = getNode(VEISD::VEC_BROADCAST, MaskVT, {TrueVal, AVL});
+  if (AllTrue)
+    return Res;
+
+  return DAG.getNOT(DL, Res, Res.getValueType());
+}
+
+SDValue VECustomDAG::getMaskBroadcast(EVT ResultVT, SDValue Scalar,
+                                      SDValue AVL) const {
+  // Constant mask splat.
+  if (auto BcConst = dyn_cast<ConstantSDNode>(Scalar))
+    return getConstantMask(getTypePacking(ResultVT),
+                           BcConst->getSExtValue() != 0);
+
+  // Expand the broadcast to a vector comparison.
+  auto ScalarBoolVT = Scalar.getSimpleValueType();
+  assert(ScalarBoolVT == MVT::i32);
+
+  // Cast to i32 ty.
+  SDValue CmpElem = DAG.getSExtOrTrunc(Scalar, DL, MVT::i32);
+  unsigned ElemCount = ResultVT.getVectorNumElements();
+  MVT CmpVecTy = MVT::getVectorVT(ScalarBoolVT, ElemCount);
+
+  // Broadcast to vector.
+  SDValue BCVec =
+      DAG.getNode(VEISD::VEC_BROADCAST, DL, CmpVecTy, {CmpElem, AVL});
+  SDValue ZeroVec =
+      getBroadcast(CmpVecTy, {DAG.getConstant(0, DL, ScalarBoolVT)}, AVL);
+
+  MVT BoolVecTy = MVT::getVectorVT(MVT::i1, ElemCount);
+
+  // Broadcast(Data) != Broadcast(0)
+  // TODO: Use a VVP operation for this.
+  return DAG.getSetCC(DL, BoolVecTy, BCVec, ZeroVec, ISD::CondCode::SETNE);
+}
+
 SDValue VECustomDAG::getBroadcast(EVT ResultVT, SDValue Scalar,
                                   SDValue AVL) const {
   assert(ResultVT.isVector());
   auto ScaVT = Scalar.getValueType();
-  assert(ScaVT != MVT::i1 && "TODO: Mask broadcasts");
+
+  if (isMaskType(ResultVT))
+    return getMaskBroadcast(ResultVT, Scalar, AVL);
 
   if (isPackedVectorType(ResultVT)) {
     // v512x packed mode broadcast

diff  --git a/llvm/lib/Target/VE/VECustomDAG.h b/llvm/lib/Target/VE/VECustomDAG.h
index 32c349526b47c..ff57645b4d11a 100644
--- a/llvm/lib/Target/VE/VECustomDAG.h
+++ b/llvm/lib/Target/VE/VECustomDAG.h
@@ -27,6 +27,8 @@ bool isVVPBinaryOp(unsigned Opcode);
 
 bool isPackedVectorType(EVT SomeVT);
 
+bool isMaskType(EVT SomeVT);
+
 bool isVVPOrVEC(unsigned);
 
 bool maySafelyIgnoreMask(SDValue Op);
@@ -73,6 +75,17 @@ std::pair<SDValue, bool> getAnnotatedNodeAVL(SDValue);
 
 /// } AVL Functions
 
+enum class Packing {
+  Normal = 0, // 256 element standard mode.
+  Dense = 1   // 512 element packed mode.
+};
+
+// Get the vector or mask register type for this packing and element type.
+MVT getLegalVectorType(Packing P, MVT ElemVT);
+
+// Whether this type belongs to a packed mask or vector register.
+Packing getTypePacking(EVT);
+
 class VECustomDAG {
   SelectionDAG &DAG;
   SDLoc DL;
@@ -117,6 +130,8 @@ class VECustomDAG {
   SDValue getConstant(uint64_t Val, EVT VT, bool IsTarget = false,
                       bool IsOpaque = false) const;
 
+  SDValue getConstantMask(Packing Packing, bool AllTrue) const;
+  SDValue getMaskBroadcast(EVT ResultVT, SDValue Scalar, SDValue AVL) const;
   SDValue getBroadcast(EVT ResultVT, SDValue Scalar, SDValue AVL) const;
 
   // Wrap AVL in a LEGALAVL node (unless it is one already).

diff  --git a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
index f8ec70dcbbf79..a4319ec1c975a 100644
--- a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
+++ b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
@@ -10,6 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "VE.h"
 #include "VETargetMachine.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
@@ -341,6 +342,36 @@ void VEDAGToDAGISel::Select(SDNode *N) {
     ReplaceNode(N, N->getOperand(0).getNode());
     return;
 
+  // Lower (broadcast 1) and (broadcast 0) to VM[P]0
+  case VEISD::VEC_BROADCAST: {
+    MVT SplatResTy = N->getSimpleValueType(0);
+    if (SplatResTy.getVectorElementType() != MVT::i1)
+      break;
+
+    // Constant non-zero broadcast.
+    auto BConst = dyn_cast<ConstantSDNode>(N->getOperand(0));
+    if (!BConst)
+      break;
+    bool BCTrueMask = (BConst->getSExtValue() != 0);
+    if (!BCTrueMask)
+      break;
+
+    // Packed or non-packed.
+    SDValue New;
+    if (SplatResTy.getVectorNumElements() == StandardVectorWidth) {
+      New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(N), VE::VM0,
+                                   MVT::v256i1);
+    } else if (SplatResTy.getVectorNumElements() == PackedVectorWidth) {
+      New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(N), VE::VMP0,
+                                   MVT::v512i1);
+    } else
+      break;
+
+    // Replace.
+    ReplaceNode(N, New.getNode());
+    return;
+  }
+
   case VEISD::GLOBAL_BASE_REG:
     ReplaceNode(N, getGlobalBaseReg());
     return;

diff  --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index 51f710b0f13f9..b55ad83e3dcf1 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -76,6 +76,8 @@ bool VETargetLowering::CanLowerReturn(
 static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
                                    MVT::v256f32, MVT::v512f32, MVT::v256f64};
 
+static const MVT AllMaskVTs[] = {MVT::v256i1, MVT::v512i1};
+
 static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
 
 void VETargetLowering::initRegisterClasses() {
@@ -294,6 +296,9 @@ void VETargetLowering::initSPUActions() {
 }
 
 void VETargetLowering::initVPUActions() {
+  for (MVT LegalMaskVT : AllMaskVTs)
+    setOperationAction(ISD::BUILD_VECTOR, LegalMaskVT, Custom);
+
   for (MVT LegalVecVT : AllVectorVTs) {
     setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom);
     setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal);
@@ -1661,7 +1666,7 @@ SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
   if (SDValue ScalarV = getSplatValue(Op.getNode())) {
     unsigned NumEls = ResultVT.getVectorNumElements();
     auto AVL = CDAG.getConstant(NumEls, MVT::i32);
-    return CDAG.getBroadcast(ResultVT, Op.getOperand(0), AVL);
+    return CDAG.getBroadcast(ResultVT, ScalarV, AVL);
   }
 
   // Expand
@@ -2696,9 +2701,9 @@ SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
 
   // The representative and legalized vector type of this operation.
   VECustomDAG CDAG(DAG, Op);
-  MVT MaskVT = MVT::v256i1; // TODO: packed mode.
   EVT OpVecVT = Op.getValueType();
   EVT LegalVecVT = getTypeToTransformTo(*DAG.getContext(), OpVecVT);
+  auto Packing = getTypePacking(LegalVecVT.getSimpleVT());
 
   SDValue AVL;
   SDValue Mask;
@@ -2713,8 +2718,7 @@ SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
   } else {
     // Materialize the VL parameter.
     AVL = CDAG.getConstant(OpVecVT.getVectorNumElements(), MVT::i32);
-    SDValue ConstTrue = CDAG.getConstant(1, MVT::i32);
-    Mask = CDAG.getBroadcast(MaskVT, ConstTrue, AVL);
+    Mask = CDAG.getConstantMask(Packing, true);
   }
 
   if (isVVPBinaryOp(VVPOpcode)) {

diff  --git a/llvm/test/CodeGen/VE/Vector/mask_broadcast.ll b/llvm/test/CodeGen/VE/Vector/mask_broadcast.ll
new file mode 100644
index 0000000000000..a0f0a656e6047
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Vector/mask_broadcast.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s
+
+define fastcc <256 x i1> @brd_v256i1_s(i1 %s) {
+; CHECK-LABEL: brd_v256i1_s:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vbrd %v0, %s0
+; CHECK-NEXT:    vbrd %v1, 0
+; CHECK-NEXT:    vcmpu.w %v0, %v0, %v1
+; CHECK-NEXT:    vfmk.w.ne %vm1, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %val = insertelement <256 x i1> undef, i1 %s, i32 0
+  %ret = shufflevector <256 x i1> %val, <256 x i1> undef, <256 x i32> zeroinitializer
+  ret <256 x i1> %ret
+}
+
+define fastcc <256 x i1> @brd_v256i1_zero() {
+; CHECK-LABEL: brd_v256i1_zero:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorm %vm1, %vm0, %vm0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %val = insertelement <256 x i1> undef, i1 0, i32 0
+  %ret = shufflevector <256 x i1> %val, <256 x i1> undef, <256 x i32> zeroinitializer
+  ret <256 x i1> %ret
+}
+
+define fastcc <256 x i1> @brd_v256i1_one() {
+; CHECK-LABEL: brd_v256i1_one:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andm %vm1, %vm0, %vm0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %val = insertelement <256 x i1> undef, i1 1, i32 0
+  %ret = shufflevector <256 x i1> %val, <256 x i1> undef, <256 x i32> zeroinitializer
+  ret <256 x i1> %ret
+}