[llvm] 5ceb0bc - [VE] Packed 32/64bit broadcast isel and tests
Simon Moll via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 26 05:17:08 PST 2022
Author: Simon Moll
Date: 2022-01-26T14:16:06+01:00
New Revision: 5ceb0bc7eaccb318eb299ee308e01210a7da1d1e
URL: https://github.com/llvm/llvm-project/commit/5ceb0bc7eaccb318eb299ee308e01210a7da1d1e
DIFF: https://github.com/llvm/llvm-project/commit/5ceb0bc7eaccb318eb299ee308e01210a7da1d1e.diff
LOG: [VE] Packed 32/64bit broadcast isel and tests
Packed-mode broadcast of f32/i32 requires the subregister to be
replicated to the full I64 register prior. Add repl_i32 and repl_f32 to
faciliate this.
Reviewed By: kaz7
Differential Revision: https://reviews.llvm.org/D117878
Added:
llvm/test/CodeGen/VE/Packed/vec_broadcast.ll
Modified:
llvm/lib/Target/VE/VECustomDAG.cpp
llvm/lib/Target/VE/VECustomDAG.h
llvm/lib/Target/VE/VEISelLowering.cpp
llvm/lib/Target/VE/VEISelLowering.h
llvm/lib/Target/VE/VEInstrInfo.td
llvm/lib/Target/VE/VEInstrPatternsVec.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/VE/VECustomDAG.cpp b/llvm/lib/Target/VE/VECustomDAG.cpp
index 2f9976e426129..af3e4af138142 100644
--- a/llvm/lib/Target/VE/VECustomDAG.cpp
+++ b/llvm/lib/Target/VE/VECustomDAG.cpp
@@ -19,6 +19,14 @@
namespace llvm {
+static const int StandardVectorWidth = 256;
+
+bool isPackedVectorType(EVT SomeVT) {
+ if (!SomeVT.isVector())
+ return false;
+ return SomeVT.getVectorNumElements() > StandardVectorWidth;
+}
+
/// \returns the VVP_* SDNode opcode corresponsing to \p OC.
Optional<unsigned> getVVPOpcode(unsigned Opcode) {
switch (Opcode) {
@@ -51,6 +59,22 @@ SDValue VECustomDAG::getConstant(uint64_t Val, EVT VT, bool IsTarget,
SDValue VECustomDAG::getBroadcast(EVT ResultVT, SDValue Scalar,
SDValue AVL) const {
+ assert(ResultVT.isVector());
+ auto ScaVT = Scalar.getValueType();
+ assert(ScaVT != MVT::i1 && "TODO: Mask broadcasts");
+
+ if (isPackedVectorType(ResultVT)) {
+ // v512x packed mode broadcast
+ // Replicate the scalar reg (f32 or i32) onto the opposing half of the full
+ // scalar register. If it's an I64 type, assume that this has already
+ // happened.
+ if (ScaVT == MVT::f32) {
+ Scalar = getNode(VEISD::REPL_F32, MVT::i64, Scalar);
+ } else if (ScaVT == MVT::i32) {
+ Scalar = getNode(VEISD::REPL_I32, MVT::i64, Scalar);
+ }
+ }
+
return getNode(VEISD::VEC_BROADCAST, ResultVT, {Scalar, AVL});
}
diff --git a/llvm/lib/Target/VE/VECustomDAG.h b/llvm/lib/Target/VE/VECustomDAG.h
index e78b5dda6828c..ddd6ce7833664 100644
--- a/llvm/lib/Target/VE/VECustomDAG.h
+++ b/llvm/lib/Target/VE/VECustomDAG.h
@@ -25,6 +25,8 @@ Optional<unsigned> getVVPOpcode(unsigned Opcode);
bool isVVPBinaryOp(unsigned Opcode);
+bool isPackedVectorType(EVT SomeVT);
+
class VECustomDAG {
SelectionDAG &DAG;
SDLoc DL;
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index 3ab876aa05c99..9137c476777e8 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -11,9 +11,9 @@
//
//===----------------------------------------------------------------------===//
-#include "VECustomDAG.h"
#include "VEISelLowering.h"
#include "MCTargetDesc/VEMCExpr.h"
+#include "VECustomDAG.h"
#include "VEInstrBuilder.h"
#include "VEMachineFunctionInfo.h"
#include "VERegisterInfo.h"
@@ -899,6 +899,8 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
TARGET_NODE_CASE(RET_FLAG)
TARGET_NODE_CASE(TS1AM)
TARGET_NODE_CASE(VEC_BROADCAST)
+ TARGET_NODE_CASE(REPL_I32)
+ TARGET_NODE_CASE(REPL_F32)
// Register the VVP_* SDNodes.
#define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
@@ -1642,8 +1644,7 @@ static SDValue getSplatValue(SDNode *N) {
SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
VECustomDAG CDAG(DAG, Op);
- unsigned NumEls = Op.getValueType().getVectorNumElements();
- MVT ElemVT = Op.getSimpleValueType().getVectorElementType();
+ MVT ResultVT = Op.getSimpleValueType();
// If there is just one element, expand to INSERT_VECTOR_ELT.
unsigned UniqueIdx;
@@ -1651,17 +1652,17 @@ SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
SDValue AccuV = CDAG.getUNDEF(Op.getValueType());
auto ElemV = Op->getOperand(UniqueIdx);
SDValue IdxV = CDAG.getConstant(UniqueIdx, MVT::i64);
- return CDAG.getNode(ISD::INSERT_VECTOR_ELT, Op.getValueType(),
- {AccuV, ElemV, IdxV});
+ return CDAG.getNode(ISD::INSERT_VECTOR_ELT, ResultVT, {AccuV, ElemV, IdxV});
}
// Else emit a broadcast.
if (SDValue ScalarV = getSplatValue(Op.getNode())) {
- // lower to VEC_BROADCAST
- MVT LegalResVT = MVT::getVectorVT(ElemVT, 256);
-
- auto AVL = CDAG.getConstant(NumEls, MVT::i32);
- return CDAG.getBroadcast(LegalResVT, Op.getOperand(0), AVL);
+ unsigned NumEls = ResultVT.getVectorNumElements();
+ // TODO: Legalize packed-mode AVL.
+ // For now, cap the AVL at 256.
+ auto CappedLength = std::min<unsigned>(256, NumEls);
+ auto AVL = CDAG.getConstant(CappedLength, MVT::i32);
+ return CDAG.getBroadcast(ResultVT, Op.getOperand(0), AVL);
}
// Expand
diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index ec0f58b7a3f6d..09bd19e837172 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -40,6 +40,8 @@ enum NodeType : unsigned {
TS1AM, // A TS1AM instruction used for 1/2 bytes swap.
VEC_BROADCAST, // A vector broadcast instruction.
// 0: scalar value, 1: VL
+ REPL_I32,
+ REPL_F32, // Replicate subregister to other half.
// VVP_* nodes.
#define ADD_VVP_OP(VVP_NAME, ...) VVP_NAME,
diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index c3abbe2cafab6..717427c3f48da 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -1576,6 +1576,12 @@ def f2l : OutPatFrag<(ops node:$exp),
def l2f : OutPatFrag<(ops node:$exp),
(EXTRACT_SUBREG $exp, sub_f32)>;
+// Zero out subregisters.
+def zero_i32 : OutPatFrag<(ops node:$expr),
+ (ANDrm $expr, 32)>;
+def zero_f32 : OutPatFrag<(ops node:$expr),
+ (ANDrm $expr, !add(32, 64))>;
+
// Small immediates.
def : Pat<(i32 simm7:$val), (EXTRACT_SUBREG (ORim (LO7 $val), 0), sub_i32)>;
def : Pat<(i64 simm7:$val), (ORim (LO7 $val), 0)>;
@@ -2287,6 +2293,16 @@ class IsVLVT<int OpIdx> : SDTCisVT<OpIdx,i32>;
def vec_broadcast : SDNode<"VEISD::VEC_BROADCAST", SDTypeProfile<1, 2,
[SDTCisVec<0>, IsVLVT<2>]>>;
+// replicate lower 32bit to upper 32bit (f32 scalar replication).
+def repl_f32 : SDNode<"VEISD::REPL_F32",
+ SDTypeProfile<1, 1,
+ [SDTCisInt<0>, SDTCisFP<1>]>>;
+// replicate upper 32bit to lower 32 bit (i32 scalar replication).
+def repl_i32 : SDNode<"VEISD::REPL_I32",
+ SDTypeProfile<1, 1,
+ [SDTCisInt<0>, SDTCisInt<1>]>>;
+
+
// Whether this is an all-true mask (assuming undef-bits above VL are all-true).
def true_mask : PatLeaf<
(vec_broadcast (i32 nonzero), (i32 srcvalue))>;
diff --git a/llvm/lib/Target/VE/VEInstrPatternsVec.td b/llvm/lib/Target/VE/VEInstrPatternsVec.td
index dc3c913c918af..6c5b80315efb5 100644
--- a/llvm/lib/Target/VE/VEInstrPatternsVec.td
+++ b/llvm/lib/Target/VE/VEInstrPatternsVec.td
@@ -15,6 +15,17 @@
// Instruction format superclass
//===----------------------------------------------------------------------===//
+// Sub-register replication for packed broadcast.
+def: Pat<(i64 (repl_f32 f32:$val)),
+ (ORrr
+ (SRLri (f2l $val), 32),
+ (zero_i32 (f2l $val)))>;
+def: Pat<(i64 (repl_i32 i32:$val)),
+ (ORrr
+ (zero_f32 (i2l $val)),
+ (SLLri (i2l $val), 32))>;
+
+
multiclass vbrd_elem32<ValueType v32, ValueType s32, SDPatternOperator ImmOp,
SDNodeXForm ImmCast, OutPatFrag SuperRegCast> {
// VBRDil
@@ -89,3 +100,8 @@ defm : patterns_elem32<v256f32, f32, simm7fp, LO7FP, l2f, f2l>;
defm : patterns_elem64<v256i64, i64, simm7, LO7>;
defm : patterns_elem64<v256f64, f64, simm7fp, LO7FP>;
+
+defm : vbrd_elem64<v512i32, i64, simm7, LO7>;
+defm : vbrd_elem64<v512f32, i64, simm7, LO7>;
+defm : vbrd_elem64<v512i32, f64, simm7fp, LO7FP>;
+defm : vbrd_elem64<v512f32, f64, simm7fp, LO7FP>;
diff --git a/llvm/test/CodeGen/VE/Packed/vec_broadcast.ll b/llvm/test/CodeGen/VE/Packed/vec_broadcast.ll
new file mode 100644
index 0000000000000..ed41f4fcb9747
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Packed/vec_broadcast.ll
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s
+
+define fastcc <512 x i32> @brd_v512i32(i32 %s) {
+; CHECK-LABEL: brd_v512i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: sll %s1, %s0, 32
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: or %s0, %s0, %s1
+; CHECK-NEXT: lea %s1, 256
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vbrd %v0, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %val = insertelement <512 x i32> undef, i32 %s, i32 0
+ %ret = shufflevector <512 x i32> %val, <512 x i32> undef, <512 x i32> zeroinitializer
+ ret <512 x i32> %ret
+}
+
+define fastcc <512 x i32> @brdi_v512i32() {
+; CHECK-LABEL: brdi_v512i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: or %s0, 17, (0)1
+; CHECK-NEXT: sll %s1, %s0, 32
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: or %s0, %s0, %s1
+; CHECK-NEXT: lea %s1, 256
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vbrd %v0, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %val = insertelement <512 x i32> undef, i32 17, i32 0
+ %ret = shufflevector <512 x i32> %val, <512 x i32> undef, <512 x i32> zeroinitializer
+ ret <512 x i32> %ret
+}
+
+define fastcc <512 x float> @brd_v512f32(float %s) {
+; CHECK-LABEL: brd_v512f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s0, (32)1
+; CHECK-NEXT: srl %s0, %s0, 32
+; CHECK-NEXT: or %s0, %s0, %s1
+; CHECK-NEXT: lea %s1, 256
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vbrd %v0, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %val = insertelement <512 x float> undef, float %s, i32 0
+ %ret = shufflevector <512 x float> %val, <512 x float> undef, <512 x i32> zeroinitializer
+ ret <512 x float> %ret
+}
+
+define fastcc <512 x float> @brdi_v512f32() {
+; CHECK-LABEL: brdi_v512f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea.sl %s0, 0
+; CHECK-NEXT: and %s1, %s0, (32)1
+; CHECK-NEXT: srl %s0, %s0, 32
+; CHECK-NEXT: or %s0, %s0, %s1
+; CHECK-NEXT: lea %s1, 256
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vbrd %v0, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %val = insertelement <512 x float> undef, float 0.e+00, i32 0
+ %ret = shufflevector <512 x float> %val, <512 x float> undef, <512 x i32> zeroinitializer
+ ret <512 x float> %ret
+}
More information about the llvm-commits
mailing list