[llvm] 82b51a1 - [AArch64] Support SLC in ACLE prefetch intrinsics
Archibald Elliott via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 16 06:42:56 PST 2022
Author: Archibald Elliott
Date: 2022-12-16T14:42:27Z
New Revision: 82b51a14280414a53413ed62c001d2c589c649c3
URL: https://github.com/llvm/llvm-project/commit/82b51a14280414a53413ed62c001d2c589c649c3
DIFF: https://github.com/llvm/llvm-project/commit/82b51a14280414a53413ed62c001d2c589c649c3.diff
LOG: [AArch64] Support SLC in ACLE prefetch intrinsics
This change:
- Modifies the ACLE code to allow the new SLC value (3) for the prefetch
target.
- Introduces a new intrinsic, @llvm.aarch64.prefetch which matches the
PRFM family instructions much more closely, and can represent all
values for the PRFM immediate.
The target-independent @llvm.prefetch intrinsic does not have enough
information for us to be able to lower to it from the ACLE intrinsics
correctly.
- Lowers the acle calls to the new intrinsic on aarch64 (the ARM
lowering is unchanged).
- Implements code generation for the new intrinsic in both SelectionDAG
and GlobalISel. We specifically choose to continue to support lowering
the target-independent @llvm.prefetch intrinsic so that other
frontends can continue to use it.
Differential Revision: https://reviews.llvm.org/D139443
Added:
llvm/test/CodeGen/AArch64/arm64-prefetch-new.ll
Modified:
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/Sema/SemaChecking.cpp
clang/test/CodeGen/arm_acle.c
clang/test/CodeGen/builtins-arm64.c
clang/test/Sema/builtins-arm64.c
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/lib/IR/Verifier.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index bd96108d6dc00..55aa9f6acd0fe 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9749,29 +9749,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
}
- if (BuiltinID == clang::AArch64::BI__builtin_arm_prefetch) {
- Value *Address = EmitScalarExpr(E->getArg(0));
- Value *RW = EmitScalarExpr(E->getArg(1));
- Value *CacheLevel = EmitScalarExpr(E->getArg(2));
- Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
- Value *IsData = EmitScalarExpr(E->getArg(4));
-
- Value *Locality = nullptr;
- if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
- // Temporal fetch, needs to convert cache level to locality.
- Locality = llvm::ConstantInt::get(Int32Ty,
- -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
- } else {
- // Streaming fetch.
- Locality = llvm::ConstantInt::get(Int32Ty, 0);
- }
-
- // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
- // PLDL3STRM or PLDL2STRM.
- Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
- return Builder.CreateCall(F, {Address, RW, Locality, IsData});
- }
-
if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
assert((getContext().getTypeSize(E->getType()) == 32) &&
"rbit of unusual size!");
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 9f0c8491ad666..86f1c3c42598a 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3226,9 +3226,9 @@ bool Sema::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI,
if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1) ||
- SemaBuiltinConstantArgRange(TheCall, 2, 0, 2) ||
- SemaBuiltinConstantArgRange(TheCall, 3, 0, 1) ||
- SemaBuiltinConstantArgRange(TheCall, 4, 0, 1);
+ SemaBuiltinConstantArgRange(TheCall, 2, 0, 3) ||
+ SemaBuiltinConstantArgRange(TheCall, 3, 0, 1) ||
+ SemaBuiltinConstantArgRange(TheCall, 4, 0, 1);
}
if (BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c
index 3697d297f89f1..d3ea9ded6583d 100644
--- a/clang/test/CodeGen/arm_acle.c
+++ b/clang/test/CodeGen/arm_acle.c
@@ -168,10 +168,15 @@ void test_swp(uint32_t x, volatile void *p) {
/* 8.6 Memory prefetch intrinsics */
/* 8.6.1 Data prefetch */
-// ARM-LABEL: @test_pld(
-// ARM-NEXT: entry:
-// ARM-NEXT: call void @llvm.prefetch.p0(ptr null, i32 0, i32 3, i32 1)
-// ARM-NEXT: ret void
+// AArch32-LABEL: @test_pld(
+// AArch32-NEXT: entry:
+// AArch32-NEXT: call void @llvm.prefetch.p0(ptr null, i32 0, i32 3, i32 1)
+// AArch32-NEXT: ret void
+//
+// AArch64-LABEL: @test_pld(
+// AArch64-NEXT: entry:
+// AArch64-NEXT: call void @llvm.aarch64.prefetch(ptr null, i32 0, i32 0, i32 0, i32 1)
+// AArch64-NEXT: ret void
//
void test_pld() {
__pld(0);
@@ -184,7 +189,7 @@ void test_pld() {
//
// AArch64-LABEL: @test_pldx(
// AArch64-NEXT: entry:
-// AArch64-NEXT: call void @llvm.prefetch.p0(ptr null, i32 1, i32 1, i32 1)
+// AArch64-NEXT: call void @llvm.aarch64.prefetch(ptr null, i32 1, i32 2, i32 0, i32 1)
// AArch64-NEXT: ret void
//
void test_pldx() {
@@ -192,10 +197,15 @@ void test_pldx() {
}
/* 8.6.2 Instruction prefetch */
-// ARM-LABEL: @test_pli(
-// ARM-NEXT: entry:
-// ARM-NEXT: call void @llvm.prefetch.p0(ptr null, i32 0, i32 3, i32 0)
-// ARM-NEXT: ret void
+// AArch32-LABEL: @test_pli(
+// AArch32-NEXT: entry:
+// AArch32-NEXT: call void @llvm.prefetch.p0(ptr null, i32 0, i32 3, i32 0)
+// AArch32-NEXT: ret void
+//
+// AArch64-LABEL: @test_pli(
+// AArch64-NEXT: entry:
+// AArch64-NEXT: call void @llvm.aarch64.prefetch(ptr null, i32 0, i32 0, i32 0, i32 0)
+// AArch64-NEXT: ret void
//
void test_pli() {
__pli(0);
@@ -208,7 +218,7 @@ void test_pli() {
//
// AArch64-LABEL: @test_plix(
// AArch64-NEXT: entry:
-// AArch64-NEXT: call void @llvm.prefetch.p0(ptr null, i32 0, i32 1, i32 0)
+// AArch64-NEXT: call void @llvm.aarch64.prefetch(ptr null, i32 0, i32 2, i32 0, i32 0)
// AArch64-NEXT: ret void
//
void test_plix() {
diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c
index 157bb73236a1e..05ea1c719edff 100644
--- a/clang/test/CodeGen/builtins-arm64.c
+++ b/clang/test/CodeGen/builtins-arm64.c
@@ -47,16 +47,19 @@ void barriers(void) {
void prefetch(void) {
__builtin_arm_prefetch(0, 1, 2, 0, 1); // pstl3keep
- // CHECK: call {{.*}} @llvm.prefetch.p0(ptr null, i32 1, i32 1, i32 1)
+ // CHECK: call {{.*}} @llvm.aarch64.prefetch(ptr null, i32 1, i32 2, i32 0, i32 1)
__builtin_arm_prefetch(0, 0, 0, 1, 1); // pldl1keep
- // CHECK: call {{.*}} @llvm.prefetch.p0(ptr null, i32 0, i32 0, i32 1)
+ // CHECK: call {{.*}} @llvm.aarch64.prefetch(ptr null, i32 0, i32 0, i32 1, i32 1)
__builtin_arm_prefetch(0, 0, 0, 1, 1); // pldl1strm
- // CHECK: call {{.*}} @llvm.prefetch.p0(ptr null, i32 0, i32 0, i32 1)
+ // CHECK: call {{.*}} @llvm.aarch64.prefetch(ptr null, i32 0, i32 0, i32 1, i32 1)
__builtin_arm_prefetch(0, 0, 0, 0, 0); // plil1keep
- // CHECK: call {{.*}} @llvm.prefetch.p0(ptr null, i32 0, i32 3, i32 0)
+ // CHECK: call {{.*}} @llvm.aarch64.prefetch(ptr null, i32 0, i32 0, i32 0, i32 0)
+
+ __builtin_arm_prefetch(0, 0, 3, 0, 1); // pldslckeep
+ // CHECK: call {{.*}} @llvm.aarch64.prefetch(ptr null, i32 0, i32 3, i32 0, i32 1)
}
__attribute__((target("v8.5a")))
diff --git a/clang/test/Sema/builtins-arm64.c b/clang/test/Sema/builtins-arm64.c
index ba4076027bbdc..e711121f7260f 100644
--- a/clang/test/Sema/builtins-arm64.c
+++ b/clang/test/Sema/builtins-arm64.c
@@ -25,7 +25,7 @@ void test_memory_barriers(void) {
void test_prefetch(void) {
__builtin_arm_prefetch(0, 2, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
- __builtin_arm_prefetch(0, 0, 3, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_arm_prefetch(0, 0, 4, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
__builtin_arm_prefetch(0, 0, 0, 2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
__builtin_arm_prefetch(0, 0, 0, 0, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index c65446e8bafab..a6ecbb66a5d7b 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -65,6 +65,14 @@ def int_aarch64_hint : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>;
def int_aarch64_break : Intrinsic<[], [llvm_i32_ty],
[IntrNoMem, IntrHasSideEffects, IntrNoReturn, IntrCold, ImmArg<ArgIndex<0>>]>;
+
+def int_aarch64_prefetch : Intrinsic<[],
+ [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrInaccessibleMemOrArgMemOnly, IntrWillReturn, ReadOnly<ArgIndex<0>>,
+ ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>
+ ]>,
+ ClangBuiltin<"__builtin_arm_prefetch">;
+
//===----------------------------------------------------------------------===//
// Data Barrier Instructions
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 093b30d3b55ef..910d1db1ccfb1 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5754,6 +5754,17 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
&Call);
break;
}
+ case Intrinsic::aarch64_prefetch: {
+ Check(cast<ConstantInt>(Call.getArgOperand(1))->getZExtValue() < 2,
+ "write argument to llvm.aarch64.prefetch must be 0 or 1", Call);
+ Check(cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue() < 4,
+ "target argument to llvm.aarch64.prefetch must be 0-3", Call);
+ Check(cast<ConstantInt>(Call.getArgOperand(3))->getZExtValue() < 2,
+ "stream argument to llvm.aarch64.prefetch must be 0 or 1", Call);
+ Check(cast<ConstantInt>(Call.getArgOperand(4))->getZExtValue() < 2,
+ "isdata argument to llvm.aarch64.prefetch must be 0 or 1", Call);
+ break;
+ }
};
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f6501a4439d14..0289da50db366 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1189,9 +1189,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
}
}
- if (Subtarget->hasSME())
- setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
-
if (Subtarget->hasSVE()) {
for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
setOperationAction(ISD::BITREVERSE, VT, Custom);
@@ -1520,6 +1517,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
}
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+
PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
IsStrictFPEnabled = true;
@@ -4724,6 +4723,44 @@ static std::optional<SMEAttrs> getCalleeAttrsFromExternalFunction(SDValue V) {
return std::nullopt;
}
+SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned IntNo = Op.getConstantOperandVal(1);
+ SDLoc DL(Op);
+ switch (IntNo) {
+ default:
+ return SDValue(); // Don't custom lower most intrinsics.
+ case Intrinsic::aarch64_prefetch: {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Addr = Op.getOperand(2);
+
+ unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ unsigned Locality = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+ unsigned IsStream = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
+ unsigned IsData = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
+ unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
+ (!IsData << 3) | // IsDataCache bit
+ (Locality << 1) | // Cache level bits
+ (unsigned)IsStream; // Stream bit
+
+ return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Chain,
+ DAG.getTargetConstant(PrfOp, DL, MVT::i32), Addr);
+ }
+ case Intrinsic::aarch64_sme_za_enable:
+ return DAG.getNode(
+ AArch64ISD::SMSTART, DL, MVT::Other,
+ Op->getOperand(0), // Chain
+ DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
+ case Intrinsic::aarch64_sme_za_disable:
+ return DAG.getNode(
+ AArch64ISD::SMSTOP, DL, MVT::Other,
+ Op->getOperand(0), // Chain
+ DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
+ }
+}
+
SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = Op.getConstantOperandVal(1);
@@ -4754,18 +4791,6 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
// changed.
return DAG.getMergeValues({MS.getValue(0), MS.getValue(2)}, DL);
}
- case Intrinsic::aarch64_sme_za_enable:
- return DAG.getNode(
- AArch64ISD::SMSTART, DL, MVT::Other,
- Op->getOperand(0), // Chain
- DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
- DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
- case Intrinsic::aarch64_sme_za_disable:
- return DAG.getNode(
- AArch64ISD::SMSTOP, DL, MVT::Other,
- Op->getOperand(0), // Chain
- DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
- DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
}
}
@@ -5872,11 +5897,12 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED);
case ISD::MULHU:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED);
- case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
return LowerINTRINSIC_W_CHAIN(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN:
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_VOID:
+ return LowerINTRINSIC_VOID(Op, DAG);
case ISD::ATOMIC_STORE:
if (cast<MemSDNode>(Op)->getMemoryVT() == MVT::i128) {
assert(Subtarget->hasLSE2());
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 49d43d5bce707..2869604815a78 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -960,6 +960,7 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
bool
isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 48d6553f59bf1..8a760a6cb5655 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1070,6 +1070,24 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MI.eraseFromParent();
return true;
}
+ case Intrinsic::aarch64_prefetch: {
+ MachineIRBuilder MIB(MI);
+ auto &AddrVal = MI.getOperand(1);
+
+ int64_t IsWrite = MI.getOperand(2).getImm();
+ int64_t Target = MI.getOperand(3).getImm();
+ int64_t IsStream = MI.getOperand(4).getImm();
+ int64_t IsData = MI.getOperand(5).getImm();
+
+ unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
+ (!IsData << 3) | // IsDataCache bit
+ (Target << 1) | // Cache level bits
+ (unsigned)IsStream; // Stream bit
+
+ MIB.buildInstr(AArch64::G_PREFETCH).addImm(PrfOp).add(AddrVal);
+ MI.eraseFromParent();
+ return true;
+ }
}
return true;
diff --git a/llvm/test/CodeGen/AArch64/arm64-prefetch-new.ll b/llvm/test/CodeGen/AArch64/arm64-prefetch-new.ll
new file mode 100644
index 0000000000000..da7d74afe56be
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-prefetch-new.ll
@@ -0,0 +1,67 @@
+; RUN: llc -mtriple=aarch64 -mattr=+v8.9a --global-isel=0 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64 -mattr=+v8.9a --global-isel=1 --global-isel-abort=1 < %s | FileCheck %s
+
+ at a = internal global ptr null, align 8
+ at b = external global ptr, align 8
+
+define void @test(ptr %i, i32 %j) nounwind ssp {
+entry:
+ ; CHECK-LABEL: @test
+ %j.addr = alloca i32, align 4
+ store i32 %j, ptr %j.addr, align 4, !tbaa !0
+ %tmp = bitcast ptr %j.addr to ptr
+
+ %i.next = getelementptr i8, ptr %i, i64 2
+
+ ; Verify prefetching works for all the
diff erent kinds of pointers we might
+ ; want to prefetch.
+
+ ; CHECK: prfm pldl1keep,
+ call void @llvm.aarch64.prefetch(ptr null, i32 0, i32 0, i32 0, i32 1)
+
+ ; CHECK: prfum pldl1keep,
+ call void @llvm.aarch64.prefetch(ptr %tmp, i32 0, i32 0, i32 0, i32 1)
+
+ ; CHECK: prfm pldl1keep,
+ call void @llvm.aarch64.prefetch(ptr %i, i32 0, i32 0, i32 0, i32 1)
+
+ ; CHECK: prfum pldl1keep,
+ call void @llvm.aarch64.prefetch(ptr %i.next, i32 0, i32 0, i32 0, i32 1)
+
+ ; CHECK: prfm pldl1keep,
+ call void @llvm.aarch64.prefetch(ptr @a, i32 0, i32 0, i32 0, i32 1)
+
+ ; CHECK: prfm pldl1keep,
+ call void @llvm.aarch64.prefetch(ptr @b, i32 0, i32 0, i32 0, i32 1)
+
+ ; Verify that we can generate every single valid prefetch value.
+
+ ; CHECK: prfm pstl1keep,
+ call void @llvm.aarch64.prefetch(ptr null, i32 1, i32 0, i32 0, i32 1)
+
+ ; CHECK: prfm pldl2keep,
+ call void @llvm.aarch64.prefetch(ptr null, i32 0, i32 1, i32 0, i32 1)
+
+ ; CHECK: prfm pldl3keep,
+ call void @llvm.aarch64.prefetch(ptr null, i32 0, i32 2, i32 0, i32 1)
+
+ ; CHECK: prfm pldslckeep,
+ call void @llvm.aarch64.prefetch(ptr null, i32 0, i32 3, i32 0, i32 1)
+
+ ; CHECK: prfm pldl1strm,
+ call void @llvm.aarch64.prefetch(ptr null, i32 0, i32 0, i32 1, i32 1)
+
+ ; CHECK: prfm plil1keep,
+ call void @llvm.aarch64.prefetch(ptr null, i32 0, i32 0, i32 0, i32 0)
+
+ ret void
+}
+
+declare void @llvm.aarch64.prefetch(ptr readonly, i32 immarg, i32 immarg, i32 immarg, i32 immarg) #0
+
+attributes #0 = { inaccessiblemem_or_argmemonly nounwind willreturn }
+
+!0 = !{!"int", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
+!3 = !{!"any pointer", !1}
More information about the llvm-commits
mailing list