[llvm] 72848f2 - [AArch64][SVE] Add predicate reinterpret intrinsics
Cullen Rhodes via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 25 02:24:35 PST 2020
Author: Cullen Rhodes
Date: 2020-02-25T10:24:06Z
New Revision: 72848f26b434624f612834443e8a7c0239026340
URL: https://github.com/llvm/llvm-project/commit/72848f26b434624f612834443e8a7c0239026340
DIFF: https://github.com/llvm/llvm-project/commit/72848f26b434624f612834443e8a7c0239026340.diff
LOG: [AArch64][SVE] Add predicate reinterpret intrinsics
Summary:
Implements the following intrinsics:
* llvm.aarch64.sve.convert.to.svbool
* llvm.aarch64.sve.convert.from.svbool
For converting the ACLE svbool_t type (<n x 16 x i1>) to and from the
other predicate types: <n x 8 x i1>, <n x 4 x i1> and <n x 2 x i1>.
Reviewers: sdesmalen, kmclaughlin, efriedma, dancgr, rengolin
Reviewed By: sdesmalen, efriedma
Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D74471
Added:
llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
Modified:
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 99a8926472c9..228b756ba2a1 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -1691,6 +1691,18 @@ def int_aarch64_sve_ptest_any : AdvSIMD_SVE_PTEST_Intrinsic;
def int_aarch64_sve_ptest_first : AdvSIMD_SVE_PTEST_Intrinsic;
def int_aarch64_sve_ptest_last : AdvSIMD_SVE_PTEST_Intrinsic;
+//
+// Reinterpreting data
+//
+
+def int_aarch64_sve_convert_from_svbool : Intrinsic<[llvm_anyvector_ty],
+ [llvm_nxv16i1_ty],
+ [IntrNoMem]>;
+
+def int_aarch64_sve_convert_to_svbool : Intrinsic<[llvm_nxv16i1_ty],
+ [llvm_anyvector_ty],
+ [IntrNoMem]>;
+
//
// Gather loads: scalar base + vector offsets
//
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 32d594450ca6..f5fc141d9cb5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1345,6 +1345,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::LASTA: return "AArch64ISD::LASTA";
case AArch64ISD::LASTB: return "AArch64ISD::LASTB";
case AArch64ISD::REV: return "AArch64ISD::REV";
+ case AArch64ISD::REINTERPRET_CAST: return "AArch64ISD::REINTERPRET_CAST";
case AArch64ISD::TBL: return "AArch64ISD::TBL";
case AArch64ISD::NOT: return "AArch64ISD::NOT";
case AArch64ISD::BIT: return "AArch64ISD::BIT";
@@ -2949,6 +2950,12 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
}
+static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
+ int Pattern) {
+ return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
+ DAG.getTargetConstant(Pattern, DL, MVT::i32));
+}
+
SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -3038,6 +3045,24 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1));
case Intrinsic::aarch64_sve_dupq_lane:
return LowerDUPQLane(Op, DAG);
+ case Intrinsic::aarch64_sve_convert_from_svbool:
+ return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_convert_to_svbool: {
+ EVT OutVT = Op.getValueType();
+ EVT InVT = Op.getOperand(1).getValueType();
+ // Return the operand if the cast isn't changing type,
+ // i.e. <n x 16 x i1> -> <n x 16 x i1>
+ if (InVT == OutVT)
+ return Op.getOperand(1);
+ // Otherwise, zero the newly introduced lanes.
+ SDValue Reinterpret =
+ DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, OutVT, Op.getOperand(1));
+ SDValue Mask = getPTrue(DAG, dl, InVT, AArch64SVEPredPattern::all);
+ SDValue MaskReinterpret =
+ DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, OutVT, Mask);
+ return DAG.getNode(ISD::AND, dl, OutVT, Reinterpret, MaskReinterpret);
+ }
case Intrinsic::aarch64_sve_insr: {
SDValue Scalar = Op.getOperand(2);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 4923d3b77ae3..fecc78f77d9c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -218,6 +218,8 @@ enum NodeType : unsigned {
DUP_PRED,
INDEX_VECTOR,
+ REINTERPRET_CAST,
+
LDNF1,
LDNF1S,
LDFF1,
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index cc3995cca975..4c7904cf7e5e 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -115,6 +115,8 @@ def AArch64dup_pred : SDNode<"AArch64ISD::DUP_PRED", SDT_AArch64DUP_PRED>;
def SDT_IndexVector : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisInt<2>]>;
def index_vector : SDNode<"AArch64ISD::INDEX_VECTOR", SDT_IndexVector, []>;
+def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>;
+
let Predicates = [HasSVE] in {
defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
@@ -1304,6 +1306,29 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
def : Pat<(nxv2f64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2f64 ZPR:$src)>;
def : Pat<(nxv2f64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2f64 ZPR:$src)>;
+ def : Pat<(nxv16i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv16i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv16i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv16i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv8i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv8i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv8i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv4i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv4i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv4i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv2i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv2i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv2i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+
+ def : Pat<(nxv16i1 (and PPR:$Ps1, PPR:$Ps2)),
+ (AND_PPzPP (PTRUE_B 31), PPR:$Ps1, PPR:$Ps2)>;
+ def : Pat<(nxv8i1 (and PPR:$Ps1, PPR:$Ps2)),
+ (AND_PPzPP (PTRUE_H 31), PPR:$Ps1, PPR:$Ps2)>;
+ def : Pat<(nxv4i1 (and PPR:$Ps1, PPR:$Ps2)),
+ (AND_PPzPP (PTRUE_S 31), PPR:$Ps1, PPR:$Ps2)>;
+ def : Pat<(nxv2i1 (and PPR:$Ps1, PPR:$Ps2)),
+ (AND_PPzPP (PTRUE_D 31), PPR:$Ps1, PPR:$Ps2)>;
+
// Add more complex addressing modes here as required
multiclass pred_load<ValueType Ty, ValueType PredTy, SDPatternOperator Load,
Instruction RegRegInst, Instruction RegImmInst, ComplexPattern AddrCP> {
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
new file mode 100644
index 000000000000..5eb75ef00e6d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
@@ -0,0 +1,84 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; Converting to svbool_t (<vscale x 16 x i1>)
+;
+
+define <vscale x 16 x i1> @reinterpret_bool_from_b(<vscale x 16 x i1> %pg) {
+; CHECK-LABEL: reinterpret_bool_from_b:
+; CHECK: ret
+ %out = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv16i1(<vscale x 16 x i1> %pg)
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 16 x i1> @reinterpret_bool_from_h(<vscale x 8 x i1> %pg) {
+; CHECK-LABEL: reinterpret_bool_from_h:
+; CHECK: ptrue p1.h
+; CHECK-NEXT: ptrue p2.b
+; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg)
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 16 x i1> @reinterpret_bool_from_s(<vscale x 4 x i1> %pg) {
+; CHECK-LABEL: reinterpret_bool_from_s:
+; CHECK: ptrue p1.s
+; CHECK-NEXT: ptrue p2.b
+; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %pg)
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 16 x i1> @reinterpret_bool_from_d(<vscale x 2 x i1> %pg) {
+; CHECK-LABEL: reinterpret_bool_from_d:
+; CHECK: ptrue p1.d
+; CHECK-NEXT: ptrue p2.b
+; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
+ ret <vscale x 16 x i1> %out
+}
+
+;
+; Converting from svbool_t
+;
+
+define <vscale x 16 x i1> @reinterpret_bool_to_b(<vscale x 16 x i1> %pg) {
+; CHECK-LABEL: reinterpret_bool_to_b:
+; CHECK: ret
+ %out = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv16i1(<vscale x 16 x i1> %pg)
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 8 x i1> @reinterpret_bool_to_h(<vscale x 16 x i1> %pg) {
+; CHECK-LABEL: reinterpret_bool_to_h:
+; CHECK: ret
+ %out = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+ ret <vscale x 8 x i1> %out
+}
+
+define <vscale x 4 x i1> @reinterpret_bool_to_s(<vscale x 16 x i1> %pg) {
+; CHECK-LABEL: reinterpret_bool_to_s:
+; CHECK: ret
+ %out = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ ret <vscale x 4 x i1> %out
+}
+
+define <vscale x 2 x i1> @reinterpret_bool_to_d(<vscale x 16 x i1> %pg) {
+; CHECK-LABEL: reinterpret_bool_to_d:
+; CHECK: ret
+ %out = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+ ret <vscale x 2 x i1> %out
+}
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv16i1(<vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv16i1(<vscale x 16 x i1>)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
More information about the llvm-commits
mailing list