[llvm] 33bf119 - [llvm][CodeGen][aarch64] Add contiguous prefetch intrinsics for SVE.
Francesco Petrogalli via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 21 12:28:03 PST 2020
Author: Francesco Petrogalli
Date: 2020-02-21T20:22:25Z
New Revision: 33bf1196475cbc9b84914c41308cf252764803ee
URL: https://github.com/llvm/llvm-project/commit/33bf1196475cbc9b84914c41308cf252764803ee
DIFF: https://github.com/llvm/llvm-project/commit/33bf1196475cbc9b84914c41308cf252764803ee.diff
LOG: [llvm][CodeGen][aarch64] Add contiguous prefetch intrinsics for SVE.
Summary: The patch covers both register/register and register/immediate addressing modes.
Reviewers: efriedma, andwar, sdesmalen
Reviewed By: sdesmalen
Subscribers: sdesmalen, tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D74581
Added:
llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll
Modified:
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/lib/Target/AArch64/SVEInstrFormats.td
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index a3234b3bdd5a..403e58846546 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -1256,6 +1256,14 @@ def int_aarch64_sve_ldff1 : AdvSIMD_1Vec_PredFaultingLoad_Intrinsic;
def int_aarch64_sve_stnt1 : AdvSIMD_1Vec_PredStore_Intrinsic;
+//
+// Prefetch
+//
+
+def int_aarch64_sve_prf : Intrinsic<[], [llvm_anyvector_ty,
+ llvm_ptr_ty,
+ llvm_i32_ty], [IntrArgMemOnly]>;
+
//
// Scalar to vector operations
//
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index ee52e2f3f8b8..9c6703129eb0 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -4439,6 +4439,46 @@ FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
return new AArch64DAGToDAGISel(TM, OptLevel);
}
+/// When \p PredVT is a scalable vector predicate in the form
+/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
+/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. If the input
+/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
+/// EVT.
+static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT) {
+ if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
+ return EVT();
+
+ const unsigned NumElts = PredVT.getVectorNumElements();
+
+ if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+ return EVT();
+
+ EVT ScalarVT = EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / NumElts);
+ EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, NumElts, /*IsScalable=*/true);
+ return MemVT;
+}
+
+/// Return the EVT of the data associated to a memory operation in \p
+/// Root. If such EVT cannot be retrived, it returns an invalid EVT.
+static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
+ if (isa<MemSDNode>(Root))
+ return cast<MemSDNode>(Root)->getMemoryVT();
+
+ const unsigned Opcode = Root->getOpcode();
+ if (Opcode != ISD::INTRINSIC_VOID)
+ return EVT();
+
+ const unsigned IntNo =
+ cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue();
+ if (IntNo != Intrinsic::aarch64_sve_prf)
+ return EVT();
+
+ // We are using an SVE prefetch intrinsic. Type must be inferred
+ // from the width of the predicate.
+ return getPackedVectorTypeFromPredicateType(
+ Ctx, Root->getOperand(2)->getValueType(0));
+}
+
/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
/// where Root is the memory access using N for its address.
@@ -4446,9 +4486,10 @@ template <int64_t Min, int64_t Max>
bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
SDValue &Base,
SDValue &OffImm) {
- assert(isa<MemSDNode>(Root) && "Invalid node.");
+ const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
- EVT MemVT = cast<MemSDNode>(Root)->getMemoryVT();
+ if (MemVT == EVT())
+ return false;
if (N.getOpcode() != ISD::ADD)
return false;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index d9c29f136959..61e48a75f8f2 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -819,6 +819,29 @@ let Predicates = [HasSVE] in {
def PRFS_PRR : sve_mem_prfm_ss<0b101, "prfw", GPR64NoXZRshifted32>;
def PRFD_PRR : sve_mem_prfm_ss<0b111, "prfd", GPR64NoXZRshifted64>;
+multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instruction RegImmInst, Instruction RegRegInst, int scale, ComplexPattern AddrCP> {
+ // reg + imm
+ let AddedComplexity = 2 in {
+ def _reg_imm : Pat<(prefetch (PredTy PPR_3b:$gp), (am_sve_indexed_s6 GPR64sp:$base, simm6s1:$offset), (i32 sve_prfop:$prfop)),
+ (RegImmInst sve_prfop:$prfop, PPR_3b:$gp, GPR64:$base, simm6s1:$offset)>;
+ }
+
+ // reg + reg
+ let AddedComplexity = 1 in {
+ def _reg_reg : Pat<(prefetch (PredTy PPR_3b:$gp), (AddrCP GPR64sp:$base, GPR64:$index), (i32 sve_prfop:$prfop)),
+ (RegRegInst sve_prfop:$prfop, PPR_3b:$gp, GPR64:$base, GPR64:$index)>;
+ }
+
+ // default fallback
+ def _default : Pat<(prefetch (PredTy PPR_3b:$gp), GPR64:$base, (i32 sve_prfop:$prfop)),
+ (RegImmInst sve_prfop:$prfop, PPR_3b:$gp, GPR64:$base, (i64 0))>;
+ }
+
+ defm : sve_prefetch<int_aarch64_sve_prf, nxv16i1, PRFB_PRI, PRFB_PRR, 0, am_sve_regreg_lsl0>;
+ defm : sve_prefetch<int_aarch64_sve_prf, nxv8i1, PRFH_PRI, PRFH_PRR, 1, am_sve_regreg_lsl1>;
+ defm : sve_prefetch<int_aarch64_sve_prf, nxv4i1, PRFW_PRI, PRFS_PRR, 2, am_sve_regreg_lsl2>;
+ defm : sve_prefetch<int_aarch64_sve_prf, nxv2i1, PRFD_PRI, PRFD_PRR, 3, am_sve_regreg_lsl3>;
+
// Gather prefetch using scaled 32-bit offsets, e.g.
// prfh pldl1keep, p0, [x0, z0.s, uxtw #1]
defm PRFB_S : sve_mem_32b_prfm_sv_scaled<0b00, "prfb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index b3a455cf9b82..7cb638356c4a 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -6597,7 +6597,6 @@ multiclass sve_mem_64b_prfm_vi<bits<2> msz, string asm, Operand imm_ty> {
(!cast<Instruction>(NAME) sve_prfop:$prfop, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>;
}
-
//===----------------------------------------------------------------------===//
// SVE Compute Vector Address Group
//===----------------------------------------------------------------------===//
@@ -7062,6 +7061,7 @@ class sve2_crypto_unary_op<bit opc, string asm>
/// Addressing modes
def am_sve_indexed_s4 :ComplexPattern<i64, 2, "SelectAddrModeIndexedSVE<-8,7>", [], [SDNPWantRoot]>;
+def am_sve_indexed_s6 :ComplexPattern<i64, 2, "SelectAddrModeIndexedSVE<-32,31>", [], [SDNPWantRoot]>;
def am_sve_regreg_lsl0 : ComplexPattern<i64, 2, "SelectSVERegRegAddrMode<0>", []>;
def am_sve_regreg_lsl1 : ComplexPattern<i64, 2, "SelectSVERegRegAddrMode<1>", []>;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll
new file mode 100644
index 000000000000..f003c373ea63
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll
@@ -0,0 +1,252 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; Testing prfop encodings
+;
+define void @test_svprf_pldl1strm(<vscale x 16 x i1> %pg, i8* %base) {
+; CHECK-LABEL: test_svprf_pldl1strm
+; CHECK: prfb pldl1strm, p0, [x0]
+entry:
+ tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 1)
+ ret void
+}
+
+define void @test_svprf_pldl2keep(<vscale x 16 x i1> %pg, i8* %base) {
+; CHECK-LABEL: test_svprf_pldl2keep
+; CHECK: prfb pldl2keep, p0, [x0]
+entry:
+ tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 2)
+ ret void
+}
+
+define void @test_svprf_pldl2strm(<vscale x 16 x i1> %pg, i8* %base) {
+; CHECK-LABEL: test_svprf_pldl2strm
+; CHECK: prfb pldl2strm, p0, [x0]
+entry:
+ tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 3)
+ ret void
+}
+
+define void @test_svprf_pldl3keep(<vscale x 16 x i1> %pg, i8* %base) {
+; CHECK-LABEL: test_svprf_pldl3keep
+; CHECK: prfb pldl3keep, p0, [x0]
+entry:
+ tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 4)
+ ret void
+}
+
+define void @test_svprf_pldl3strm(<vscale x 16 x i1> %pg, i8* %base) {
+; CHECK-LABEL: test_svprf_pldl3strm
+; CHECK: prfb pldl3strm, p0, [x0]
+entry:
+ tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 5)
+ ret void
+}
+
+define void @test_svprf_pstl1keep(<vscale x 16 x i1> %pg, i8* %base) {
+; CHECK-LABEL: test_svprf_pstl1keep
+; CHECK: prfb pstl1keep, p0, [x0]
+entry:
+ tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 8)
+ ret void
+}
+
+define void @test_svprf_pstl1strm(<vscale x 16 x i1> %pg, i8* %base) {
+; CHECK-LABEL: test_svprf_pstl1strm
+; CHECK: prfb pstl1strm, p0, [x0]
+entry:
+ tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 9)
+ ret void
+}
+
+define void @test_svprf_pstl2keep(<vscale x 16 x i1> %pg, i8* %base) {
+; CHECK-LABEL: test_svprf_pstl2keep
+; CHECK: prfb pstl2keep, p0, [x0]
+entry:
+ tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 10)
+ ret void
+}
+
+define void @test_svprf_pstl2strm(<vscale x 16 x i1> %pg, i8* %base) {
+; CHECK-LABEL: test_svprf_pstl2strm
+; CHECK: prfb pstl2strm, p0, [x0]
+entry:
+ tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 11)
+ ret void
+}
+
+define void @test_svprf_pstl3keep(<vscale x 16 x i1> %pg, i8* %base) {
+; CHECK-LABEL: test_svprf_pstl3keep
+; CHECK: prfb pstl3keep, p0, [x0]
+entry:
+ tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 12)
+ ret void
+}
+
+define void @test_svprf_pstl3strm(<vscale x 16 x i1> %pg, i8* %base) {
+; CHECK-LABEL: test_svprf_pstl3strm
+; CHECK: prfb pstl3strm, p0, [x0]
+entry:
+ tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 13)
+ ret void
+}
+
+;
+; Testing imm limits of SI form
+;
+
+define void @test_svprf_vnum_under(<vscale x 16 x i1> %pg, <vscale x 16 x i8>* %base) {
+; CHECK-LABEL: test_svprf_vnum_under
+; CHECK-NOT: prfb pstl3strm, p0, [x0, #-33, mul vl]
+entry:
+ %gep = getelementptr inbounds <vscale x 16 x i8>, <vscale x 16 x i8>* %base, i64 -33, i64 0
+ tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %gep, i32 13)
+ ret void
+}
+
+define void @test_svprf_vnum_min(<vscale x 16 x i1> %pg, <vscale x 16 x i8>* %base) {
+; CHECK-LABEL: test_svprf_vnum_min
+; CHECK: prfb pstl3strm, p0, [x0, #-32, mul vl]
+entry:
+ %gep = getelementptr inbounds <vscale x 16 x i8>, <vscale x 16 x i8>* %base, i64 -32, i64 0
+ tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %gep, i32 13)
+ ret void
+}
+
+define void @test_svprf_vnum_over(<vscale x 16 x i1> %pg, <vscale x 16 x i8>* %base) {
+; CHECK-LABEL: test_svprf_vnum_over
+; CHECK-NOT: prfb pstl3strm, p0, [x0, #32, mul vl]
+entry:
+ %gep = getelementptr inbounds <vscale x 16 x i8>, <vscale x 16 x i8>* %base, i64 32, i64 0
+ tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %gep, i32 13)
+ ret void
+}
+
+define void @test_svprf_vnum_max(<vscale x 16 x i1> %pg, <vscale x 16 x i8>* %base) {
+; CHECK-LABEL: test_svprf_vnum_max
+; CHECK: prfb pstl3strm, p0, [x0, #31, mul vl]
+entry:
+ %gep = getelementptr inbounds <vscale x 16 x i8>, <vscale x 16 x i8>* %base, i64 31, i64 0
+ tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %gep, i32 13)
+ ret void
+}
+
+;
+; scalar contiguous
+;
+
+define void @test_svprfb(<vscale x 16 x i1> %pg, i8* %base) {
+; CHECK-LABEL: test_svprfb
+; CHECK: prfb pldl1keep, p0, [x0]
+entry:
+ tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 0)
+ ret void
+}
+
+define void @test_svprfh(<vscale x 8 x i1> %pg, i8* %base) {
+; CHECK-LABEL: test_svprfh
+; CHECK: prfh pldl1keep, p0, [x0]
+entry:
+ tail call void @llvm.aarch64.sve.prf.nxv8i1(<vscale x 8 x i1> %pg, i8* %base, i32 0)
+ ret void
+}
+
+define void @test_svprfw(<vscale x 4 x i1> %pg, i8* %base) {
+; CHECK-LABEL: test_svprfw
+; CHECK: prfw pldl1keep, p0, [x0]
+entry:
+ tail call void @llvm.aarch64.sve.prf.nxv4i1(<vscale x 4 x i1> %pg, i8* %base, i32 0)
+ ret void
+}
+
+define void @test_svprfd(<vscale x 2 x i1> %pg, i8* %base) {
+; CHECK-LABEL: test_svprfd
+; CHECK: prfd pldl1keep, p0, [x0]
+entry:
+ tail call void @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %pg, i8* %base, i32 0)
+ ret void
+}
+
+;
+; scalar + imm contiguous
+;
+; imm form of prfb is tested above
+
+define void @test_svprfh_vnum(<vscale x 8 x i1> %pg, <vscale x 8 x i16>* %base) {
+; CHECK-LABEL: test_svprfh_vnum
+; CHECK: prfh pstl3strm, p0, [x0, #31, mul vl]
+entry:
+ %gep = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %base, i64 31
+ %addr = bitcast <vscale x 8 x i16>* %gep to i8*
+ tail call void @llvm.aarch64.sve.prf.nxv8i1(<vscale x 8 x i1> %pg, i8* %addr, i32 13)
+ ret void
+}
+
+define void @test_svprfw_vnum(<vscale x 4 x i1> %pg, <vscale x 4 x i32>* %base) {
+; CHECK-LABEL: test_svprfw_vnum
+; CHECK: prfw pstl3strm, p0, [x0, #31, mul vl]
+entry:
+ %gep = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %base, i64 31
+ %addr = bitcast <vscale x 4 x i32>* %gep to i8*
+ tail call void @llvm.aarch64.sve.prf.nxv4i1(<vscale x 4 x i1> %pg, i8* %addr, i32 13)
+ ret void
+}
+
+define void @test_svprfd_vnum(<vscale x 2 x i1> %pg, <vscale x 2 x i64>* %base) {
+; CHECK-LABEL: test_svprfd_vnum
+; CHECK: prfd pstl3strm, p0, [x0, #31, mul vl]
+entry:
+ %gep = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %base, i64 31
+ %addr = bitcast <vscale x 2 x i64>* %gep to i8*
+ tail call void @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %pg, i8* %addr, i32 13)
+ ret void
+}
+
+;
+; scalar + scaled scalar contiguous
+;
+
+define void @test_svprfb_ss(<vscale x 16 x i1> %pg, i8* %base, i64 %offset) {
+; CHECK-LABEL: test_svprfb_ss
+; CHECK: prfb pstl3strm, p0, [x0, x1]
+entry:
+ %addr = getelementptr i8, i8* %base, i64 %offset
+ tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %addr, i32 13)
+ ret void
+}
+
+define void @test_svprfh_ss(<vscale x 8 x i1> %pg, i16* %base, i64 %offset) {
+; CHECK-LABEL: test_svprfh_ss
+; CHECK: prfh pstl3strm, p0, [x0, x1, lsl #1]
+entry:
+ %gep = getelementptr i16, i16* %base, i64 %offset
+ %addr = bitcast i16* %gep to i8*
+ tail call void @llvm.aarch64.sve.prf.nxv8i1(<vscale x 8 x i1> %pg, i8* %addr, i32 13)
+ ret void
+}
+
+define void @test_svprfw_ss(<vscale x 4 x i1> %pg, i32* %base, i64 %offset) {
+; CHECK-LABEL: test_svprfw_ss
+; CHECK: prfw pstl3strm, p0, [x0, x1, lsl #2]
+entry:
+ %gep = getelementptr i32, i32* %base, i64 %offset
+ %addr = bitcast i32* %gep to i8*
+ tail call void @llvm.aarch64.sve.prf.nxv4i1(<vscale x 4 x i1> %pg, i8* %addr, i32 13)
+ ret void
+}
+
+define void @test_svprfd_ss(<vscale x 2 x i1> %pg, i64* %base, i64 %offset) {
+; CHECK-LABEL: test_svprfd_ss
+; CHECK: prfd pstl3strm, p0, [x0, x1, lsl #3]
+entry:
+ %gep = getelementptr i64, i64* %base, i64 %offset
+ %addr = bitcast i64* %gep to i8*
+ tail call void @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %pg, i8* %addr, i32 13)
+ ret void
+}
+
+
+declare void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1>, i8*, i32)
+declare void @llvm.aarch64.sve.prf.nxv8i1(<vscale x 8 x i1>, i8*, i32)
+declare void @llvm.aarch64.sve.prf.nxv4i1(<vscale x 4 x i1>, i8*, i32)
+declare void @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1>, i8*, i32)
More information about the llvm-commits
mailing list