[llvm] cf964eb - [VE] v512i1 mask arithmetic isel
Simon Moll via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 21 01:39:24 PST 2022
Author: Simon Moll
Date: 2022-02-21T10:38:11+01:00
New Revision: cf964eb5bd666c870cc21963fc5bf017699c29d7
URL: https://github.com/llvm/llvm-project/commit/cf964eb5bd666c870cc21963fc5bf017699c29d7
DIFF: https://github.com/llvm/llvm-project/commit/cf964eb5bd666c870cc21963fc5bf017699c29d7.diff
LOG: [VE] v512i1 mask arithmetic isel
Packed vector and mask registers (v512) are composed of two v256
subregisters that occupy the even and odd element positions. We add
packing support SDNodes (vec_unpack_lo|hi and vec_pack) and splitting of
v512i1 mask arithmetic ops with those.
Reviewed By: kaz7
Differential Revision: https://reviews.llvm.org/D120053
Added:
llvm/test/CodeGen/VE/Packed/mask_binary.ll
Modified:
llvm/lib/Target/VE/VECustomDAG.cpp
llvm/lib/Target/VE/VECustomDAG.h
llvm/lib/Target/VE/VEISelLowering.cpp
llvm/lib/Target/VE/VEISelLowering.h
llvm/lib/Target/VE/VEInstrInfo.td
llvm/lib/Target/VE/VEInstrPatternsVec.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/VE/VECustomDAG.cpp b/llvm/lib/Target/VE/VECustomDAG.cpp
index 7b12bb898c391..d605cdcc7ee15 100644
--- a/llvm/lib/Target/VE/VECustomDAG.cpp
+++ b/llvm/lib/Target/VE/VECustomDAG.cpp
@@ -41,6 +41,17 @@ bool isMaskType(EVT SomeVT) {
return SomeVT.getVectorElementType() == MVT::i1;
}
+bool isMaskArithmetic(SDValue Op) {
+ switch (Op.getOpcode()) {
+ default:
+ return false;
+ case ISD::AND:
+ case ISD::XOR:
+ case ISD::OR:
+ return isMaskType(Op.getValueType());
+ }
+}
+
/// \returns the VVP_* SDNode opcode corresponsing to \p OC.
Optional<unsigned> getVVPOpcode(unsigned Opcode) {
switch (Opcode) {
@@ -206,4 +217,18 @@ SDValue VECustomDAG::annotateLegalAVL(SDValue AVL) const {
return getNode(VEISD::LEGALAVL, AVL.getValueType(), AVL);
}
+SDValue VECustomDAG::getUnpack(EVT DestVT, SDValue Vec, PackElem Part,
+ SDValue AVL) {
+ // TODO: Peek through VEC_PACK and VEC_BROADCAST(REPL_<sth> ..) operands.
+ unsigned OC =
+ (Part == PackElem::Lo) ? VEISD::VEC_UNPACK_LO : VEISD::VEC_UNPACK_HI;
+ return DAG.getNode(OC, DL, DestVT, Vec, AVL);
+}
+
+SDValue VECustomDAG::getPack(EVT DestVT, SDValue LoVec, SDValue HiVec,
+ SDValue AVL) {
+ // TODO: Peek through VEC_UNPACK_LO|HI operands.
+ return DAG.getNode(VEISD::VEC_PACK, DL, DestVT, LoVec, HiVec, AVL);
+}
+
} // namespace llvm
diff --git a/llvm/lib/Target/VE/VECustomDAG.h b/llvm/lib/Target/VE/VECustomDAG.h
index ff57645b4d11a..4adceef341f48 100644
--- a/llvm/lib/Target/VE/VECustomDAG.h
+++ b/llvm/lib/Target/VE/VECustomDAG.h
@@ -29,6 +29,8 @@ bool isPackedVectorType(EVT SomeVT);
bool isMaskType(EVT SomeVT);
+bool isMaskArithmetic(SDValue Op);
+
bool isVVPOrVEC(unsigned);
bool maySafelyIgnoreMask(SDValue Op);
@@ -86,6 +88,11 @@ MVT getLegalVectorType(Packing P, MVT ElemVT);
// Whether this type belongs to a packed mask or vector register.
Packing getTypePacking(EVT);
+enum class PackElem : int8_t {
+ Lo = 0, // Integer (63, 32]
+ Hi = 1 // Float (32, 0]
+};
+
class VECustomDAG {
SelectionDAG &DAG;
SDLoc DL;
@@ -127,6 +134,11 @@ class VECustomDAG {
SDValue getUNDEF(EVT VT) const { return DAG.getUNDEF(VT); }
/// } getNode
+ /// Packing {
+ SDValue getUnpack(EVT DestVT, SDValue Vec, PackElem Part, SDValue AVL);
+ SDValue getPack(EVT DestVT, SDValue LoVec, SDValue HiVec, SDValue AVL);
+ /// } Packing
+
SDValue getConstant(uint64_t Val, EVT VT, bool IsTarget = false,
bool IsOpaque = false) const;
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index 0585bfa9ae78c..38182dca7ba76 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -299,6 +299,9 @@ void VETargetLowering::initVPUActions() {
for (MVT LegalMaskVT : AllMaskVTs)
setOperationAction(ISD::BUILD_VECTOR, LegalMaskVT, Custom);
+ for (unsigned Opc : {ISD::AND, ISD::OR, ISD::XOR})
+ setOperationAction(Opc, MVT::v512i1, Custom);
+
for (MVT LegalVecVT : AllVectorVTs) {
setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal);
@@ -903,6 +906,9 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
TARGET_NODE_CASE(MEMBARRIER)
TARGET_NODE_CASE(RET_FLAG)
TARGET_NODE_CASE(TS1AM)
+ TARGET_NODE_CASE(VEC_UNPACK_LO)
+ TARGET_NODE_CASE(VEC_UNPACK_HI)
+ TARGET_NODE_CASE(VEC_PACK)
TARGET_NODE_CASE(VEC_BROADCAST)
TARGET_NODE_CASE(REPL_I32)
TARGET_NODE_CASE(REPL_F32)
@@ -1746,6 +1752,8 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
// Translate into a VEC_*/VVP_* layer operation.
#define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
#include "VVPNodes.def"
+ if (isMaskArithmetic(Op) && isPackedVectorType(Op.getValueType()))
+ return splitMaskArithmetic(Op, DAG);
return lowerToVVP(Op, DAG);
}
}
@@ -2690,6 +2698,23 @@ bool VETargetLowering::hasAndNot(SDValue Y) const {
return true;
}
+SDValue VETargetLowering::splitMaskArithmetic(SDValue Op,
+ SelectionDAG &DAG) const {
+ VECustomDAG CDAG(DAG, Op);
+ SDValue AVL =
+ CDAG.getConstant(Op.getValueType().getVectorNumElements(), MVT::i32);
+ SDValue A = Op->getOperand(0);
+ SDValue B = Op->getOperand(1);
+ SDValue LoA = CDAG.getUnpack(MVT::v256i1, A, PackElem::Lo, AVL);
+ SDValue HiA = CDAG.getUnpack(MVT::v256i1, A, PackElem::Hi, AVL);
+ SDValue LoB = CDAG.getUnpack(MVT::v256i1, B, PackElem::Lo, AVL);
+ SDValue HiB = CDAG.getUnpack(MVT::v256i1, B, PackElem::Hi, AVL);
+ unsigned Opc = Op.getOpcode();
+ auto LoRes = CDAG.getNode(Opc, MVT::v256i1, {LoA, LoB});
+ auto HiRes = CDAG.getNode(Opc, MVT::v256i1, {HiA, HiB});
+ return CDAG.getPack(MVT::v512i1, LoRes, HiRes, AVL);
+}
+
SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
// Can we represent this as a VVP node.
const unsigned Opcode = Op->getOpcode();
diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index 30d1faa7495d8..604f34fa2086a 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -38,8 +38,14 @@ enum NodeType : unsigned {
MEMBARRIER, // Compiler barrier only; generate a no-op.
RET_FLAG, // Return with a flag operand.
TS1AM, // A TS1AM instruction used for 1/2 bytes swap.
- VEC_BROADCAST, // A vector broadcast instruction.
- // 0: scalar value, 1: VL
+ VEC_UNPACK_LO, // unpack the lo v256 slice of a packed v512 vector.
+ VEC_UNPACK_HI, // unpack the hi v256 slice of a packed v512 vector.
+ // 0: v512 vector, 1: AVL
+ VEC_PACK, // pack a lo and a hi vector into one v512 vector
+ // 0: v256 lo vector, 1: v256 hi vector, 2: AVL
+
+ VEC_BROADCAST, // A vector broadcast instruction.
+ // 0: scalar value, 1: VL
REPL_I32,
REPL_F32, // Replicate subregister to other half.
@@ -182,6 +188,7 @@ class VETargetLowering : public TargetLowering {
SDValue lowerToVVP(SDValue Op, SelectionDAG &DAG) const;
SDValue legalizeInternalVectorOp(SDValue Op, SelectionDAG &DAG) const;
SDValue legalizePackedAVL(SDValue Op, VECustomDAG &CDAG) const;
+ SDValue splitMaskArithmetic(SDValue Op, SelectionDAG &DAG) const;
/// } VVPLowering
/// Custom DAGCombine {
diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index 717427c3f48da..dc9fa4352170b 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -2293,6 +2293,18 @@ class IsVLVT<int OpIdx> : SDTCisVT<OpIdx,i32>;
def vec_broadcast : SDNode<"VEISD::VEC_BROADCAST", SDTypeProfile<1, 2,
[SDTCisVec<0>, IsVLVT<2>]>>;
+///// Packed mode Support /////
+// unpack the lo part of this vector
+def vec_unpack_lo : SDNode<"VEISD::VEC_UNPACK_LO", SDTypeProfile<1, 2,
+ [SDTCisVec<0>, SDTCisVec<1>, IsVLVT<2>]>>;
+// unpack the hipart of this vector
+def vec_unpack_hi : SDNode<"VEISD::VEC_UNPACK_HI", SDTypeProfile<1, 2,
+ [SDTCisVec<0>, SDTCisVec<1>, IsVLVT<2>]>>;
+// re-pack v256i32, v256f32 back into tone v512.32
+def vec_pack : SDNode<"VEISD::VEC_PACK", SDTypeProfile<1, 3,
+ [SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>,
+ SDTCisSameNumEltsAs<1,2>, IsVLVT<3>]>>;
+
// replicate lower 32bit to upper 32bit (f32 scalar replication).
def repl_f32 : SDNode<"VEISD::REPL_F32",
SDTypeProfile<1, 1,
diff --git a/llvm/lib/Target/VE/VEInstrPatternsVec.td b/llvm/lib/Target/VE/VEInstrPatternsVec.td
index f33c4ac0fb42a..e17b418201c65 100644
--- a/llvm/lib/Target/VE/VEInstrPatternsVec.td
+++ b/llvm/lib/Target/VE/VEInstrPatternsVec.td
@@ -112,3 +112,16 @@ class Mask_Binary<ValueType MaskVT, SDPatternOperator MaskOp, string InstName> :
def: Mask_Binary<v256i1, and, "ANDM">;
def: Mask_Binary<v256i1, or, "ORM">;
def: Mask_Binary<v256i1, xor, "XORM">;
+
+///// Packing support /////
+
+// v256i1 <> v512i1
+def : Pat<(v256i1 (vec_unpack_lo v512i1:$vm, (i32 srcvalue))),
+ (EXTRACT_SUBREG $vm, sub_vm_odd)>;
+def : Pat<(v256i1 (vec_unpack_hi v512i1:$vm, (i32 srcvalue))),
+ (EXTRACT_SUBREG $vm, sub_vm_even)>;
+def : Pat<(v512i1 (vec_pack v256i1:$vlo, v256i1:$vhi, (i32 srcvalue))),
+ (INSERT_SUBREG (INSERT_SUBREG
+ (v512i1 (IMPLICIT_DEF)),
+ $vlo, sub_vm_odd),
+ $vhi, sub_vm_even)>;
diff --git a/llvm/test/CodeGen/VE/Packed/mask_binary.ll b/llvm/test/CodeGen/VE/Packed/mask_binary.ll
new file mode 100644
index 0000000000000..d6b2d7fdcfe15
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Packed/mask_binary.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
+
+; Function Attrs: nounwind
+define fastcc <512 x i1> @and_mm_v512i1(<512 x i1> %x, <512 x i1> %y) {
+; CHECK-LABEL: and_mm_v512i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andm %vm6, %vm2, %vm4
+; CHECK-NEXT: andm %vm7, %vm3, %vm5
+; CHECK-NEXT: andm %vm2, %vm0, %vm6
+; CHECK-NEXT: andm %vm3, %vm0, %vm7
+; CHECK-NEXT: b.l.t (, %s10)
+ %z = and <512 x i1> %x, %y
+ ret <512 x i1> %z
+}
+
+; Function Attrs: nounwind
+define fastcc <512 x i1> @or_mm_v512i1(<512 x i1> %x, <512 x i1> %y) {
+; CHECK-LABEL: or_mm_v512i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: orm %vm6, %vm2, %vm4
+; CHECK-NEXT: orm %vm7, %vm3, %vm5
+; CHECK-NEXT: andm %vm2, %vm0, %vm6
+; CHECK-NEXT: andm %vm3, %vm0, %vm7
+; CHECK-NEXT: b.l.t (, %s10)
+ %z = or <512 x i1> %x, %y
+ ret <512 x i1> %z
+}
+
+; Function Attrs: nounwind
+define fastcc <512 x i1> @xor_mm_v512i1(<512 x i1> %x, <512 x i1> %y) {
+; CHECK-LABEL: xor_mm_v512i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorm %vm6, %vm2, %vm4
+; CHECK-NEXT: xorm %vm7, %vm3, %vm5
+; CHECK-NEXT: andm %vm2, %vm0, %vm6
+; CHECK-NEXT: andm %vm3, %vm0, %vm7
+; CHECK-NEXT: b.l.t (, %s10)
+ %z = xor <512 x i1> %x, %y
+ ret <512 x i1> %z
+}
+
More information about the llvm-commits
mailing list