[llvm] 4fd7712 - [VE] Split unsupported v512.32 ops
Simon Moll via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 22 05:30:57 PST 2022
Author: Simon Moll
Date: 2022-02-22T14:29:41+01:00
New Revision: 4fd77129f2deb4f84e39b3c3e11095522cae542f
URL: https://github.com/llvm/llvm-project/commit/4fd77129f2deb4f84e39b3c3e11095522cae542f
DIFF: https://github.com/llvm/llvm-project/commit/4fd77129f2deb4f84e39b3c3e11095522cae542f.diff
LOG: [VE] Split unsupported v512.32 ops
Split v512.32 binary ops into two v256.32 ops using packing support
opcodes (vec_unpack_lo|hi, vec_pack).
Depends on D120053 for packing opcodes.
Reviewed By: kaz7
Differential Revision: https://reviews.llvm.org/D120146
Added:
llvm/test/CodeGen/VE/Packed/vp_fdiv.ll
llvm/test/CodeGen/VE/Packed/vp_mul.ll
llvm/test/CodeGen/VE/Packed/vp_sdiv.ll
llvm/test/CodeGen/VE/Packed/vp_udiv.ll
Modified:
llvm/lib/Target/VE/VECustomDAG.cpp
llvm/lib/Target/VE/VECustomDAG.h
llvm/lib/Target/VE/VEISelLowering.cpp
llvm/lib/Target/VE/VEISelLowering.h
llvm/lib/Target/VE/VEInstrPatternsVec.td
llvm/lib/Target/VE/VVPISelLowering.cpp
llvm/lib/Target/VE/VVPNodes.def
Removed:
################################################################################
diff --git a/llvm/lib/Target/VE/VECustomDAG.cpp b/llvm/lib/Target/VE/VECustomDAG.cpp
index d605cdcc7ee15..ed463fe624ad0 100644
--- a/llvm/lib/Target/VE/VECustomDAG.cpp
+++ b/llvm/lib/Target/VE/VECustomDAG.cpp
@@ -25,6 +25,12 @@ bool isPackedVectorType(EVT SomeVT) {
return SomeVT.getVectorNumElements() > StandardVectorWidth;
}
+MVT splitVectorType(MVT VT) {
+ if (!VT.isVector())
+ return VT;
+ return MVT::getVectorVT(VT.getVectorElementType(), StandardVectorWidth);
+}
+
MVT getLegalVectorType(Packing P, MVT ElemVT) {
return MVT::getVectorVT(ElemVT, P == Packing::Normal ? StandardVectorWidth
: PackedVectorWidth);
@@ -83,6 +89,31 @@ bool maySafelyIgnoreMask(SDValue Op) {
}
}
+bool supportsPackedMode(unsigned Opcode, EVT IdiomVT) {
+ bool IsPackedOp = isPackedVectorType(IdiomVT);
+ bool IsMaskOp = isMaskType(IdiomVT);
+ switch (Opcode) {
+ default:
+ return false;
+
+ case VEISD::VEC_BROADCAST:
+ return true;
+#define REGISTER_PACKED(VVP_NAME) case VEISD::VVP_NAME:
+#include "VVPNodes.def"
+ return IsPackedOp && !IsMaskOp;
+ }
+}
+
+bool isPackingSupportOpcode(unsigned Opc) {
+ switch (Opc) {
+ case VEISD::VEC_PACK:
+ case VEISD::VEC_UNPACK_LO:
+ case VEISD::VEC_UNPACK_HI:
+ return true;
+ }
+ return false;
+}
+
bool isVVPOrVEC(unsigned Opcode) {
switch (Opcode) {
case VEISD::VEC_BROADCAST:
@@ -125,6 +156,25 @@ Optional<int> getAVLPos(unsigned Opc) {
return None;
}
+Optional<int> getMaskPos(unsigned Opc) {
+ // This is only available for VP SDNodes
+ auto PosOpt = ISD::getVPMaskIdx(Opc);
+ if (PosOpt)
+ return *PosOpt;
+
+ // VVP Opcodes.
+ if (isVVPBinaryOp(Opc))
+ return 2;
+
+ // VM Opcodes.
+ switch (Opc) {
+ case VEISD::VVP_SELECT:
+ return 2;
+ }
+
+ return None;
+}
+
bool isLegalAVL(SDValue AVL) { return AVL->getOpcode() == VEISD::LEGALAVL; }
SDValue getNodeAVL(SDValue Op) {
@@ -132,6 +182,11 @@ SDValue getNodeAVL(SDValue Op) {
return PosOpt ? Op->getOperand(*PosOpt) : SDValue();
}
+SDValue getNodeMask(SDValue Op) {
+ auto PosOpt = getMaskPos(Op->getOpcode());
+ return PosOpt ? Op->getOperand(*PosOpt) : SDValue();
+}
+
std::pair<SDValue, bool> getAnnotatedNodeAVL(SDValue Op) {
SDValue AVL = getNodeAVL(Op);
if (!AVL)
@@ -218,7 +273,9 @@ SDValue VECustomDAG::annotateLegalAVL(SDValue AVL) const {
}
SDValue VECustomDAG::getUnpack(EVT DestVT, SDValue Vec, PackElem Part,
- SDValue AVL) {
+ SDValue AVL) const {
+ assert(getAnnotatedNodeAVL(AVL).second && "Expected a pack-legalized AVL");
+
// TODO: Peek through VEC_PACK and VEC_BROADCAST(REPL_<sth> ..) operands.
unsigned OC =
(Part == PackElem::Lo) ? VEISD::VEC_UNPACK_LO : VEISD::VEC_UNPACK_HI;
@@ -226,9 +283,34 @@ SDValue VECustomDAG::getUnpack(EVT DestVT, SDValue Vec, PackElem Part,
}
SDValue VECustomDAG::getPack(EVT DestVT, SDValue LoVec, SDValue HiVec,
- SDValue AVL) {
+ SDValue AVL) const {
+ assert(getAnnotatedNodeAVL(AVL).second && "Expected a pack-legalized AVL");
+
// TODO: Peek through VEC_UNPACK_LO|HI operands.
return DAG.getNode(VEISD::VEC_PACK, DL, DestVT, LoVec, HiVec, AVL);
}
+VETargetMasks VECustomDAG::getTargetSplitMask(SDValue RawMask, SDValue RawAVL,
+ PackElem Part) const {
+ // Adjust AVL for this part
+ SDValue NewAVL;
+ SDValue OneV = getConstant(1, MVT::i32);
+ if (Part == PackElem::Hi)
+ NewAVL = getNode(ISD::ADD, MVT::i32, {RawAVL, OneV});
+ else
+ NewAVL = RawAVL;
+ NewAVL = getNode(ISD::SRL, MVT::i32, {NewAVL, OneV});
+
+ NewAVL = annotateLegalAVL(NewAVL);
+
+ // Legalize Mask (unpack or all-true)
+ SDValue NewMask;
+ if (!RawMask)
+ NewMask = getConstantMask(Packing::Normal, true);
+ else
+ NewMask = getUnpack(MVT::v256i1, RawMask, Part, NewAVL);
+
+ return VETargetMasks(NewMask, NewAVL);
+}
+
} // namespace llvm
diff --git a/llvm/lib/Target/VE/VECustomDAG.h b/llvm/lib/Target/VE/VECustomDAG.h
index 4adceef341f48..6553b90a2b69b 100644
--- a/llvm/lib/Target/VE/VECustomDAG.h
+++ b/llvm/lib/Target/VE/VECustomDAG.h
@@ -25,6 +25,8 @@ Optional<unsigned> getVVPOpcode(unsigned Opcode);
bool isVVPBinaryOp(unsigned Opcode);
+MVT splitVectorType(MVT VT);
+
bool isPackedVectorType(EVT SomeVT);
bool isMaskType(EVT SomeVT);
@@ -33,6 +35,10 @@ bool isMaskArithmetic(SDValue Op);
bool isVVPOrVEC(unsigned);
+bool supportsPackedMode(unsigned Opcode, EVT IdiomVT);
+
+bool isPackingSupportOpcode(unsigned Opc);
+
bool maySafelyIgnoreMask(SDValue Op);
/// The VE backend uses a two-staged process to lower and legalize vector
@@ -71,6 +77,11 @@ bool isLegalAVL(SDValue AVL);
// The AVL operand of this node.
SDValue getNodeAVL(SDValue);
+// Mask position of this node.
+Optional<int> getMaskPos(unsigned);
+
+SDValue getNodeMask(SDValue);
+
// Return the AVL operand of this node. If it is a LEGALAVL node, unwrap it.
// Return with the boolean whether unwrapping happened.
std::pair<SDValue, bool> getAnnotatedNodeAVL(SDValue);
@@ -93,6 +104,13 @@ enum class PackElem : int8_t {
Hi = 1 // Float (32, 0]
};
+struct VETargetMasks {
+ SDValue Mask;
+ SDValue AVL;
+ VETargetMasks(SDValue Mask = SDValue(), SDValue AVL = SDValue())
+ : Mask(Mask), AVL(AVL) {}
+};
+
class VECustomDAG {
SelectionDAG &DAG;
SDLoc DL;
@@ -135,8 +153,8 @@ class VECustomDAG {
/// } getNode
/// Packing {
- SDValue getUnpack(EVT DestVT, SDValue Vec, PackElem Part, SDValue AVL);
- SDValue getPack(EVT DestVT, SDValue LoVec, SDValue HiVec, SDValue AVL);
+ SDValue getUnpack(EVT DestVT, SDValue Vec, PackElem Part, SDValue AVL) const;
+ SDValue getPack(EVT DestVT, SDValue LoVec, SDValue HiVec, SDValue AVL) const;
/// } Packing
SDValue getConstant(uint64_t Val, EVT VT, bool IsTarget = false,
@@ -148,6 +166,8 @@ class VECustomDAG {
// Wrap AVL in a LEGALAVL node (unless it is one already).
SDValue annotateLegalAVL(SDValue AVL) const;
+ VETargetMasks getTargetSplitMask(SDValue RawMask, SDValue RawAVL,
+ PackElem Part) const;
};
} // namespace llvm
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index 38182dca7ba76..0e3f2eb522829 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -1681,6 +1681,15 @@ SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
TargetLowering::LegalizeAction
VETargetLowering::getCustomOperationAction(SDNode &Op) const {
+ // Custom legalization on VVP_* and VEC_* opcodes is required to pack-legalize
+ // these operations (transform nodes such that their AVL parameter refers to
+ // packs of 64bit, instead of number of elements.
+
+ // Packing opcodes are created with a pack-legal AVL (LEGALAVL). No need to
+ // re-visit them.
+ if (isPackingSupportOpcode(Op.getOpcode()))
+ return Legal;
+
// Custom lower to legalize AVL for packed mode.
if (isVVPOrVEC(Op.getOpcode()))
return Custom;
diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index 604f34fa2086a..cc7a156d5b937 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -187,6 +187,7 @@ class VETargetLowering : public TargetLowering {
/// VVP Lowering {
SDValue lowerToVVP(SDValue Op, SelectionDAG &DAG) const;
SDValue legalizeInternalVectorOp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue splitVectorOp(SDValue Op, VECustomDAG &CDAG) const;
SDValue legalizePackedAVL(SDValue Op, VECustomDAG &CDAG) const;
SDValue splitMaskArithmetic(SDValue Op, SelectionDAG &DAG) const;
/// } VVPLowering
diff --git a/llvm/lib/Target/VE/VEInstrPatternsVec.td b/llvm/lib/Target/VE/VEInstrPatternsVec.td
index e17b418201c65..71199717a3a2b 100644
--- a/llvm/lib/Target/VE/VEInstrPatternsVec.td
+++ b/llvm/lib/Target/VE/VEInstrPatternsVec.td
@@ -125,3 +125,26 @@ def : Pat<(v512i1 (vec_pack v256i1:$vlo, v256i1:$vhi, (i32 srcvalue))),
(v512i1 (IMPLICIT_DEF)),
$vlo, sub_vm_odd),
$vhi, sub_vm_even)>;
+
+// v256.32 <> v512.32
+multiclass Packing<ValueType PackVT> {
+ // no-op unpacks
+ def : Pat<(v256i32 (vec_unpack_lo PackVT:$vp, (i32 srcvalue))),
+ (COPY_TO_REGCLASS $vp, V64)>;
+ def : Pat<(v256f32 (vec_unpack_hi PackVT:$vp, (i32 srcvalue))),
+ (COPY_TO_REGCLASS $vp, V64)>;
+
+ // shuffle unpacks
+ def : Pat<(v256f32 (vec_unpack_lo PackVT:$vp, i32:$avl)),
+ (VSHFvvil $vp, $vp, 4, $avl)>; // always pick lo
+ def : Pat<(v256i32 (vec_unpack_hi PackVT:$vp, i32:$avl)),
+ (VSHFvvil $vp, $vp, 0, $avl)>; // always pick hi
+}
+
+defm : Packing<v512i32>;
+defm : Packing<v512f32>;
+
+def : Pat<(v512i32 (vec_pack v256i32:$vlo, v256i32:$vhi, i32:$avl)),
+ (VSHFvvil $vlo, $vhi, 13, $avl)>;
+def : Pat<(v512f32 (vec_pack v256f32:$vlo, v256f32:$vhi, i32:$avl)),
+ (VSHFvvil $vlo, $vhi, 8, $avl)>;
diff --git a/llvm/lib/Target/VE/VVPISelLowering.cpp b/llvm/lib/Target/VE/VVPISelLowering.cpp
index 735f65bf4c9a3..54fdd9f3ac543 100644
--- a/llvm/lib/Target/VE/VVPISelLowering.cpp
+++ b/llvm/lib/Target/VE/VVPISelLowering.cpp
@@ -21,10 +21,68 @@ using namespace llvm;
SDValue VETargetLowering::legalizeInternalVectorOp(SDValue Op,
SelectionDAG &DAG) const {
VECustomDAG CDAG(DAG, Op);
+
+ EVT IdiomVT = Op.getValueType();
+ if (isPackedVectorType(IdiomVT) &&
+ !supportsPackedMode(Op.getOpcode(), IdiomVT))
+ return splitVectorOp(Op, CDAG);
+
// TODO: Implement odd/even splitting.
return legalizePackedAVL(Op, CDAG);
}
+SDValue VETargetLowering::splitVectorOp(SDValue Op, VECustomDAG &CDAG) const {
+ MVT ResVT = splitVectorType(Op.getValue(0).getSimpleValueType());
+
+ auto AVLPos = getAVLPos(Op->getOpcode());
+ auto MaskPos = getMaskPos(Op->getOpcode());
+
+ SDValue PackedMask = getNodeMask(Op);
+ auto AVLPair = getAnnotatedNodeAVL(Op);
+ SDValue PackedAVL = AVLPair.first;
+ assert(!AVLPair.second && "Expecting non pack-legalized oepration");
+
+ // request the parts
+ SDValue PartOps[2];
+
+ SDValue UpperPartAVL; // we will use this for packing things back together
+ for (PackElem Part : {PackElem::Hi, PackElem::Lo}) {
+ // VP ops already have an explicit mask and AVL. When expanding from non-VP
+ // attach those additional inputs here.
+ auto SplitTM = CDAG.getTargetSplitMask(PackedMask, PackedAVL, Part);
+
+ if (Part == PackElem::Hi)
+ UpperPartAVL = SplitTM.AVL;
+
+ // Attach non-predicating value operands
+ SmallVector<SDValue, 4> OpVec;
+ for (unsigned i = 0; i < Op.getNumOperands(); ++i) {
+ if (AVLPos && ((int)i) == *AVLPos)
+ continue;
+ if (MaskPos && ((int)i) == *MaskPos)
+ continue;
+
+ // Value operand
+ auto PackedOperand = Op.getOperand(i);
+ auto UnpackedOpVT = splitVectorType(PackedOperand.getSimpleValueType());
+ SDValue PartV =
+ CDAG.getUnpack(UnpackedOpVT, PackedOperand, Part, SplitTM.AVL);
+ OpVec.push_back(PartV);
+ }
+
+ // Add predicating args and generate part node.
+ OpVec.push_back(SplitTM.Mask);
+ OpVec.push_back(SplitTM.AVL);
+ // Emit legal VVP nodes.
+ PartOps[(int)Part] =
+ CDAG.getNode(Op.getOpcode(), ResVT, OpVec, Op->getFlags());
+ }
+
+ // Re-package vectors.
+ return CDAG.getPack(Op.getValueType(), PartOps[(int)PackElem::Lo],
+ PartOps[(int)PackElem::Hi], UpperPartAVL);
+}
+
SDValue VETargetLowering::legalizePackedAVL(SDValue Op,
VECustomDAG &CDAG) const {
LLVM_DEBUG(dbgs() << "::legalizePackedAVL\n";);
diff --git a/llvm/lib/Target/VE/VVPNodes.def b/llvm/lib/Target/VE/VVPNodes.def
index edb0cbe69efec..1ba602f4f2d36 100644
--- a/llvm/lib/Target/VE/VVPNodes.def
+++ b/llvm/lib/Target/VE/VVPNodes.def
@@ -38,31 +38,37 @@
ADD_BINARY_VVP_OP(VVP_##NAME,VP_##NAME,NAME)
#endif
+/// REGISTER_PACKED(OPC)
+/// \p OPC The VVP opcode of the operation.
+#ifndef REGISTER_PACKED
+#define REGISTER_PACKED(OPC)
+#endif
+
// Integer arithmetic.
-ADD_BINARY_VVP_OP_COMPACT(ADD)
-ADD_BINARY_VVP_OP_COMPACT(SUB)
+ADD_BINARY_VVP_OP_COMPACT(ADD) REGISTER_PACKED(VVP_ADD)
+ADD_BINARY_VVP_OP_COMPACT(SUB) REGISTER_PACKED(VVP_SUB)
ADD_BINARY_VVP_OP_COMPACT(MUL)
ADD_BINARY_VVP_OP_COMPACT(UDIV)
ADD_BINARY_VVP_OP_COMPACT(SDIV)
-ADD_BINARY_VVP_OP(VVP_SRA,VP_ASHR,SRA)
-ADD_BINARY_VVP_OP(VVP_SRL,VP_LSHR,SRL)
-ADD_BINARY_VVP_OP_COMPACT(SHL)
+ADD_BINARY_VVP_OP(VVP_SRA,VP_ASHR,SRA) REGISTER_PACKED(VVP_SRA)
+ADD_BINARY_VVP_OP(VVP_SRL,VP_LSHR,SRL) REGISTER_PACKED(VVP_SRL)
+ADD_BINARY_VVP_OP_COMPACT(SHL) REGISTER_PACKED(VVP_SHL)
-ADD_BINARY_VVP_OP_COMPACT(AND)
-ADD_BINARY_VVP_OP_COMPACT(OR)
-ADD_BINARY_VVP_OP_COMPACT(XOR)
+ADD_BINARY_VVP_OP_COMPACT(AND) REGISTER_PACKED(VVP_AND)
+ADD_BINARY_VVP_OP_COMPACT(OR) REGISTER_PACKED(VVP_OR)
+ADD_BINARY_VVP_OP_COMPACT(XOR) REGISTER_PACKED(VVP_XOR)
// FP arithmetic.
-ADD_BINARY_VVP_OP_COMPACT(FADD)
-ADD_BINARY_VVP_OP_COMPACT(FSUB)
-ADD_BINARY_VVP_OP_COMPACT(FMUL)
+ADD_BINARY_VVP_OP_COMPACT(FADD) REGISTER_PACKED(VVP_FADD)
+ADD_BINARY_VVP_OP_COMPACT(FSUB) REGISTER_PACKED(VVP_FSUB)
+ADD_BINARY_VVP_OP_COMPACT(FMUL) REGISTER_PACKED(VVP_FMUL)
ADD_BINARY_VVP_OP_COMPACT(FDIV)
ADD_VVP_OP(VVP_SETCC, SETCC)
// Shuffles.
-ADD_VVP_OP(VVP_SELECT,VSELECT)
+ADD_VVP_OP(VVP_SELECT,VSELECT) REGISTER_PACKED(VVP_SELECT)
HANDLE_VP_TO_VVP(VP_SELECT, VVP_SELECT)
HANDLE_VP_TO_VVP(VP_MERGE, VVP_SELECT)
@@ -70,3 +76,4 @@ HANDLE_VP_TO_VVP(VP_MERGE, VVP_SELECT)
#undef ADD_BINARY_VVP_OP_COMPACT
#undef ADD_VVP_OP
#undef HANDLE_VP_TO_VVP
+#undef REGISTER_PACKED
diff --git a/llvm/test/CodeGen/VE/Packed/vp_fdiv.ll b/llvm/test/CodeGen/VE/Packed/vp_fdiv.ll
new file mode 100644
index 0000000000000..50c3fa189ea85
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Packed/vp_fdiv.ll
@@ -0,0 +1,82 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
+
+declare <512 x float> @llvm.vp.fdiv.v512f32(<512 x float>, <512 x float>, <512 x i1>, i32)
+
+define fastcc <512 x float> @test_vp_fdiv_v512f32_vv(<512 x float> %i0, <512 x float> %i1, <512 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fdiv_v512f32_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s0, (32)0
+; CHECK-NEXT: srl %s1, %s1, 1
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vshf %v2, %v1, %v1, 4
+; CHECK-NEXT: vshf %v3, %v0, %v0, 4
+; CHECK-NEXT: vfdiv.s %v2, %v3, %v2, %vm3
+; CHECK-NEXT: adds.w.sx %s0, 1, %s0
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: srl %s0, %s0, 1
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vfdiv.s %v0, %v0, %v1, %vm2
+; CHECK-NEXT: vshf %v0, %v2, %v0, 8
+; CHECK-NEXT: b.l.t (, %s10)
+ %r0 = call <512 x float> @llvm.vp.fdiv.v512f32(<512 x float> %i0, <512 x float> %i1, <512 x i1> %m, i32 %n)
+ ret <512 x float> %r0
+}
+
+define fastcc <512 x float> @test_vp_fdiv_v512f32_rv(float %s0, <512 x float> %i1, <512 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fdiv_v512f32_rv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s2, %s0, (32)1
+; CHECK-NEXT: srl %s0, %s0, 32
+; CHECK-NEXT: or %s0, %s0, %s2
+; CHECK-NEXT: lea %s2, 256
+; CHECK-NEXT: lvl %s2
+; CHECK-NEXT: vbrd %v1, %s0
+; CHECK-NEXT: adds.w.sx %s0, 1, %s1
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: srl %s0, %s0, 1
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vfdiv.s %v2, %v1, %v0, %vm2
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: srl %s1, %s1, 1
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vshf %v1, %v1, %v1, 4
+; CHECK-NEXT: vshf %v0, %v0, %v0, 4
+; CHECK-NEXT: vfdiv.s %v0, %v1, %v0, %vm3
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vshf %v0, %v0, %v2, 8
+; CHECK-NEXT: b.l.t (, %s10)
+ %xins = insertelement <512 x float> undef, float %s0, i32 0
+ %i0 = shufflevector <512 x float> %xins, <512 x float> undef, <512 x i32> zeroinitializer
+ %r0 = call <512 x float> @llvm.vp.fdiv.v512f32(<512 x float> %i0, <512 x float> %i1, <512 x i1> %m, i32 %n)
+ ret <512 x float> %r0
+}
+
+define fastcc <512 x float> @test_vp_fdiv_v512f32_vr(<512 x float> %i0, float %s1, <512 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fdiv_v512f32_vr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s2, %s0, (32)1
+; CHECK-NEXT: srl %s0, %s0, 32
+; CHECK-NEXT: or %s0, %s0, %s2
+; CHECK-NEXT: lea %s2, 256
+; CHECK-NEXT: lvl %s2
+; CHECK-NEXT: vbrd %v1, %s0
+; CHECK-NEXT: adds.w.sx %s0, 1, %s1
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: srl %s0, %s0, 1
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vfdiv.s %v2, %v0, %v1, %vm2
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: srl %s1, %s1, 1
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vshf %v1, %v1, %v1, 4
+; CHECK-NEXT: vshf %v0, %v0, %v0, 4
+; CHECK-NEXT: vfdiv.s %v0, %v0, %v1, %vm3
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vshf %v0, %v0, %v2, 8
+; CHECK-NEXT: b.l.t (, %s10)
+ %yins = insertelement <512 x float> undef, float %s1, i32 0
+ %i1 = shufflevector <512 x float> %yins, <512 x float> undef, <512 x i32> zeroinitializer
+ %r0 = call <512 x float> @llvm.vp.fdiv.v512f32(<512 x float> %i0, <512 x float> %i1, <512 x i1> %m, i32 %n)
+ ret <512 x float> %r0
+}
diff --git a/llvm/test/CodeGen/VE/Packed/vp_mul.ll b/llvm/test/CodeGen/VE/Packed/vp_mul.ll
new file mode 100644
index 0000000000000..26271eef99454
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Packed/vp_mul.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
+
+declare <512 x i32> @llvm.vp.mul.v512i32(<512 x i32>, <512 x i32>, <512 x i1>, i32)
+
+define fastcc <512 x i32> @test_vp_v512i32(<512 x i32> %i0, <512 x i32> %i1, <512 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_v512i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: adds.w.sx %s1, 1, %s0
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: srl %s1, %s1, 1
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vshf %v2, %v1, %v1, 0
+; CHECK-NEXT: vshf %v3, %v0, %v0, 0
+; CHECK-NEXT: vmuls.w.sx %v2, %v3, %v2, %vm2
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: srl %s0, %s0, 1
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vmuls.w.sx %v0, %v0, %v1, %vm3
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vshf %v0, %v0, %v2, 13
+; CHECK-NEXT: b.l.t (, %s10)
+ %r0 = call <512 x i32> @llvm.vp.mul.v512i32(<512 x i32> %i0, <512 x i32> %i1, <512 x i1> %m, i32 %n)
+ ret <512 x i32> %r0
+}
diff --git a/llvm/test/CodeGen/VE/Packed/vp_sdiv.ll b/llvm/test/CodeGen/VE/Packed/vp_sdiv.ll
new file mode 100644
index 0000000000000..24202eaecedb7
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Packed/vp_sdiv.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
+
+declare <512 x i32> @llvm.vp.sdiv.v512i32(<512 x i32>, <512 x i32>, <512 x i1>, i32)
+
+define fastcc <512 x i32> @test_vp_sdiv_v512i32_vv(<512 x i32> %i0, <512 x i32> %i1, <512 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_sdiv_v512i32_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: adds.w.sx %s1, 1, %s0
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: srl %s1, %s1, 1
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vshf %v2, %v1, %v1, 0
+; CHECK-NEXT: vshf %v3, %v0, %v0, 0
+; CHECK-NEXT: vdivs.w.sx %v2, %v3, %v2, %vm2
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: srl %s0, %s0, 1
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vdivs.w.sx %v0, %v0, %v1, %vm3
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vshf %v0, %v0, %v2, 13
+; CHECK-NEXT: b.l.t (, %s10)
+ %r0 = call <512 x i32> @llvm.vp.sdiv.v512i32(<512 x i32> %i0, <512 x i32> %i1, <512 x i1> %m, i32 %n)
+ ret <512 x i32> %r0
+}
+
+define fastcc <512 x i32> @test_vp_sdiv_v512i32_rv(i32 %s0, <512 x i32> %i1, <512 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_sdiv_v512i32_rv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: sll %s2, %s0, 32
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: or %s0, %s0, %s2
+; CHECK-NEXT: lea %s2, 256
+; CHECK-NEXT: lvl %s2
+; CHECK-NEXT: vbrd %v1, %s0
+; CHECK-NEXT: adds.w.sx %s0, 1, %s1
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: srl %s0, %s0, 1
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vshf %v2, %v1, %v1, 0
+; CHECK-NEXT: vshf %v3, %v0, %v0, 0
+; CHECK-NEXT: vdivs.w.sx %v2, %v2, %v3, %vm2
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: srl %s1, %s1, 1
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vdivs.w.sx %v0, %v1, %v0, %vm3
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vshf %v0, %v0, %v2, 13
+; CHECK-NEXT: b.l.t (, %s10)
+ %xins = insertelement <512 x i32> undef, i32 %s0, i32 0
+ %i0 = shufflevector <512 x i32> %xins, <512 x i32> undef, <512 x i32> zeroinitializer
+ %r0 = call <512 x i32> @llvm.vp.sdiv.v512i32(<512 x i32> %i0, <512 x i32> %i1, <512 x i1> %m, i32 %n)
+ ret <512 x i32> %r0
+}
+
+define fastcc <512 x i32> @test_vp_sdiv_v512i32_vr(<512 x i32> %i0, i32 %s1, <512 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_sdiv_v512i32_vr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: sll %s2, %s0, 32
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: or %s0, %s0, %s2
+; CHECK-NEXT: lea %s2, 256
+; CHECK-NEXT: lvl %s2
+; CHECK-NEXT: vbrd %v1, %s0
+; CHECK-NEXT: adds.w.sx %s0, 1, %s1
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: srl %s0, %s0, 1
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vshf %v2, %v1, %v1, 0
+; CHECK-NEXT: vshf %v3, %v0, %v0, 0
+; CHECK-NEXT: vdivs.w.sx %v2, %v3, %v2, %vm2
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: srl %s1, %s1, 1
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vdivs.w.sx %v0, %v0, %v1, %vm3
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vshf %v0, %v0, %v2, 13
+; CHECK-NEXT: b.l.t (, %s10)
+ %yins = insertelement <512 x i32> undef, i32 %s1, i32 0
+ %i1 = shufflevector <512 x i32> %yins, <512 x i32> undef, <512 x i32> zeroinitializer
+ %r0 = call <512 x i32> @llvm.vp.sdiv.v512i32(<512 x i32> %i0, <512 x i32> %i1, <512 x i1> %m, i32 %n)
+ ret <512 x i32> %r0
+}
diff --git a/llvm/test/CodeGen/VE/Packed/vp_udiv.ll b/llvm/test/CodeGen/VE/Packed/vp_udiv.ll
new file mode 100644
index 0000000000000..80e1729bf64a0
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Packed/vp_udiv.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
+
+declare <512 x i32> @llvm.vp.udiv.v512i32(<512 x i32>, <512 x i32>, <512 x i1>, i32)
+
+define fastcc <512 x i32> @test_vp_udiv_v512i32_vv(<512 x i32> %i0, <512 x i32> %i1, <512 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_udiv_v512i32_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: adds.w.sx %s1, 1, %s0
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: srl %s1, %s1, 1
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vshf %v2, %v1, %v1, 0
+; CHECK-NEXT: vshf %v3, %v0, %v0, 0
+; CHECK-NEXT: vdivu.w %v2, %v3, %v2, %vm2
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: srl %s0, %s0, 1
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vdivu.w %v0, %v0, %v1, %vm3
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vshf %v0, %v0, %v2, 13
+; CHECK-NEXT: b.l.t (, %s10)
+ %r0 = call <512 x i32> @llvm.vp.udiv.v512i32(<512 x i32> %i0, <512 x i32> %i1, <512 x i1> %m, i32 %n)
+ ret <512 x i32> %r0
+}
+
+define fastcc <512 x i32> @test_vp_udiv_v512i32_rv(i32 %s0, <512 x i32> %i1, <512 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_udiv_v512i32_rv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: sll %s2, %s0, 32
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: or %s0, %s0, %s2
+; CHECK-NEXT: lea %s2, 256
+; CHECK-NEXT: lvl %s2
+; CHECK-NEXT: vbrd %v1, %s0
+; CHECK-NEXT: adds.w.sx %s0, 1, %s1
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: srl %s0, %s0, 1
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vshf %v2, %v1, %v1, 0
+; CHECK-NEXT: vshf %v3, %v0, %v0, 0
+; CHECK-NEXT: vdivu.w %v2, %v2, %v3, %vm2
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: srl %s1, %s1, 1
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vdivu.w %v0, %v1, %v0, %vm3
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vshf %v0, %v0, %v2, 13
+; CHECK-NEXT: b.l.t (, %s10)
+ %xins = insertelement <512 x i32> undef, i32 %s0, i32 0
+ %i0 = shufflevector <512 x i32> %xins, <512 x i32> undef, <512 x i32> zeroinitializer
+ %r0 = call <512 x i32> @llvm.vp.udiv.v512i32(<512 x i32> %i0, <512 x i32> %i1, <512 x i1> %m, i32 %n)
+ ret <512 x i32> %r0
+}
+
+define fastcc <512 x i32> @test_vp_udiv_v512i32_vr(<512 x i32> %i0, i32 %s1, <512 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_udiv_v512i32_vr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: sll %s2, %s0, 32
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: or %s0, %s0, %s2
+; CHECK-NEXT: lea %s2, 256
+; CHECK-NEXT: lvl %s2
+; CHECK-NEXT: vbrd %v1, %s0
+; CHECK-NEXT: adds.w.sx %s0, 1, %s1
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: srl %s0, %s0, 1
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vshf %v2, %v1, %v1, 0
+; CHECK-NEXT: vshf %v3, %v0, %v0, 0
+; CHECK-NEXT: vdivu.w %v2, %v3, %v2, %vm2
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: srl %s1, %s1, 1
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vdivu.w %v0, %v0, %v1, %vm3
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vshf %v0, %v0, %v2, 13
+; CHECK-NEXT: b.l.t (, %s10)
+ %yins = insertelement <512 x i32> undef, i32 %s1, i32 0
+ %i1 = shufflevector <512 x i32> %yins, <512 x i32> undef, <512 x i32> zeroinitializer
+ %r0 = call <512 x i32> @llvm.vp.udiv.v512i32(<512 x i32> %i0, <512 x i32> %i1, <512 x i1> %m, i32 %n)
+ ret <512 x i32> %r0
+}
More information about the llvm-commits
mailing list