[clang] 04e5bc7 - [AArch64] Add support for range prefetch intrinsic (#170490)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 12 07:53:21 PST 2026
Author: Kerry McLaughlin
Date: 2026-01-12T15:53:17Z
New Revision: 04e5bc7dfbd0a5b4c00470ccea4cc43dcfc2d38a
URL: https://github.com/llvm/llvm-project/commit/04e5bc7dfbd0a5b4c00470ccea4cc43dcfc2d38a
DIFF: https://github.com/llvm/llvm-project/commit/04e5bc7dfbd0a5b4c00470ccea4cc43dcfc2d38a.diff
LOG: [AArch64] Add support for range prefetch intrinsic (#170490)
This patch adds support in Clang for the RPRFM instruction, by adding
the following intrinsics:
```
void __pldx_range(unsigned int *access_kind*, unsigned int retention_policy,
signed int length*, unsigned int count, signed int stride,
size_t reuse distance, void const *addr);
void __pld_range(unsigned int access_kind*, unsigned int retention_policy,
uint64_t metadata, void const *addr);
```
The `__ARM_PREFETCH_RANGE` macro can be used to test whether these
intrinsics are implemented. If the RPRFM instruction is not available, this
instruction is a NOP.
This implements the following ACLE proposal:
https://github.com/ARM-software/acle/pull/423
Added:
llvm/test/CodeGen/AArch64/range-prefetch.ll
llvm/test/Verifier/AArch64/intrinsic-immarg.ll
Modified:
clang/include/clang/Basic/BuiltinsAArch64.def
clang/lib/Basic/Targets/AArch64.cpp
clang/lib/CodeGen/TargetBuiltins/ARM.cpp
clang/lib/Headers/arm_acle.h
clang/lib/Sema/SemaARM.cpp
clang/test/CodeGen/arm_acle.c
clang/test/CodeGen/builtins-arm64.c
clang/test/Preprocessor/aarch64-target-features.c
clang/test/Preprocessor/init-aarch64.c
clang/test/Sema/builtins-arm64.c
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/lib/IR/Verifier.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64InstrGISel.td
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def
index adb6c941e852a..5ae5affb51fde 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -96,6 +96,10 @@ TARGET_BUILTIN(__builtin_arm_jcvt, "Zid", "nc", "v8.3a")
// Prefetch
BUILTIN(__builtin_arm_prefetch, "vvC*UiUiUiUi", "nc")
+// Range Prefetch
+BUILTIN(__builtin_arm_range_prefetch_x, "vvC*UiUiiUiiz", "n")
+BUILTIN(__builtin_arm_range_prefetch, "vvC*UiUiWUi", "n")
+
// System Registers
BUILTIN(__builtin_arm_rsr, "UicC*", "nc")
BUILTIN(__builtin_arm_rsr64, "WUicC*", "nc")
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index ecd441be364c2..fe407e9fc1789 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -477,6 +477,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM", Opts.ShortEnums ? "1" : "4");
+ // Clang supports range prefetch intrinsics
+ Builder.defineMacro("__ARM_PREFETCH_RANGE", "1");
+
if (FPU & NeonMode) {
Builder.defineMacro("__ARM_NEON", "1");
// 64-bit NEON supports half, single and double precision operations.
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 05dff01a71b9a..2d7128bf95df2 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -2660,6 +2660,56 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
return Builder.CreateCall(F, { Metadata, ArgValue });
}
+static Value *EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned BuiltinID,
+ const CallExpr *E) {
+ CodeGen::CGBuilderTy &Builder = CGF.Builder;
+ CodeGen::CodeGenModule &CGM = CGF.CGM;
+ SmallVector<llvm::Value *, 4> Ops;
+
+ auto getIntArg = [&](unsigned ArgNo) {
+ Expr::EvalResult Result;
+ if (!E->getArg(ArgNo)->EvaluateAsInt(Result, CGM.getContext()))
+ llvm_unreachable("Expected constant argument to range prefetch.");
+ return Result.Val.getInt().getExtValue();
+ };
+
+ Ops.push_back(CGF.EmitScalarExpr(E->getArg(0))); /*Addr*/
+ Ops.push_back(CGF.EmitScalarExpr(E->getArg(1))); /*Access Kind*/
+ Ops.push_back(CGF.EmitScalarExpr(E->getArg(2))); /*Policy*/
+
+ if (BuiltinID == clang::AArch64::BI__builtin_arm_range_prefetch_x) {
+ auto Length = getIntArg(3);
+ auto Count = getIntArg(4) - 1;
+ auto Stride = getIntArg(5);
+ auto Distance = getIntArg(6);
+
+ // Map ReuseDistance given in bytes to four bits representing decreasing
+ // powers of two in the range 512MiB (0b0001) to 32KiB (0b1111). Values
+ // are rounded up to the nearest power of 2, starting at 32KiB. Any value
+ // over the maximum is represented by 0 (distance not known).
+ if (Distance > 0) {
+ Distance = llvm::Log2_32_Ceil(Distance);
+ if (Distance < 15)
+ Distance = 15;
+ else if (Distance > 29)
+ Distance = 0;
+ else
+ Distance = 30 - Distance;
+ }
+
+ uint64_t Mask22 = (1ULL << 22) - 1;
+ uint64_t Mask16 = (1ULL << 16) - 1;
+ uint64_t Metadata = (Distance << 60) | ((Stride & Mask22) << 38) |
+ ((Count & Mask16) << 22) | (Length & Mask22);
+
+ Ops.push_back(llvm::ConstantInt::get(Builder.getInt64Ty(), Metadata));
+ } else
+ Ops.push_back(CGF.EmitScalarExpr(E->getArg(3)));
+
+ return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_range_prefetch),
+ Ops);
+}
+
/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
/// argument that specifies the vector type.
static bool HasExtraNeonArgument(unsigned BuiltinID) {
@@ -5447,6 +5497,10 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
}
+ if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch ||
+ BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x)
+ return EmitRangePrefetchBuiltin(*this, BuiltinID, E);
+
// Memory Tagging Extensions (MTE) Intrinsics
Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
switch (BuiltinID) {
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 97f63e8ecf71f..622e8f3d6aa7b 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -98,6 +98,12 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
#else
#define __pldx(access_kind, cache_level, retention_policy, addr) \
__builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)
+#define __pldx_range(access_kind, retention_policy, length, count, stride, \
+ reuse_distance, addr) \
+ __builtin_arm_range_prefetch_x(addr, access_kind, retention_policy, length, \
+ count, stride, reuse_distance)
+#define __pld_range(access_kind, retention_policy, metadata, addr) \
+ __builtin_arm_range_prefetch(addr, access_kind, retention_policy, metadata)
#endif
/* 7.6.2 Instruction prefetch */
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 81504b74c5e45..53e8c002a1962 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -1122,6 +1122,19 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI,
SemaRef.BuiltinConstantArgRange(TheCall, 4, 0, 1);
}
+ if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x) {
+ return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1) ||
+ SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 1) ||
+ SemaRef.BuiltinConstantArgRange(TheCall, 3, -2097152, 2097151) ||
+ SemaRef.BuiltinConstantArgRange(TheCall, 4, 1, 65536) ||
+ SemaRef.BuiltinConstantArgRange(TheCall, 5, -2097152, 2097151);
+ }
+
+ if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch) {
+ return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1) ||
+ SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 1);
+ }
+
if (BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
BuiltinID == AArch64::BI__builtin_arm_rsr128 ||
diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c
index 0f539cba5c758..2606ad6dd2ec1 100644
--- a/clang/test/CodeGen/arm_acle.c
+++ b/clang/test/CodeGen/arm_acle.c
@@ -164,6 +164,28 @@ void test_pld() {
__pld(0);
}
+#if defined(__ARM_64BIT_STATE) && defined(__ARM_PREFETCH_RANGE)
+
+// AArch64-LABEL: @test_pld_range(
+// AArch64-NEXT: entry:
+// AArch64-NEXT: call void @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 1, i64 [[MD:%.*]])
+// AArch64-NEXT: ret void
+//
+void test_pld_range(uint64_t md) {
+ __pld_range(0, 1, md, 0);
+}
+
+// AArch64-LABEL: @test_pldx_range(
+// AArch64-NEXT: entry:
+// AArch64-NEXT: call void @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 1, i64 -576460477427613697)
+// AArch64-NEXT: ret void
+//
+void test_pldx_range() {
+ __pldx_range(0, 1, 2097151, 65536, -2097152, 15, 0);
+}
+
+#endif
+
// AArch32-LABEL: @test_pldx(
// AArch32-NEXT: entry:
// AArch32-NEXT: call void @llvm.prefetch.p0(ptr null, i32 1, i32 3, i32 1)
diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c
index 86c2812434643..c1fd348371f38 100644
--- a/clang/test/CodeGen/builtins-arm64.c
+++ b/clang/test/CodeGen/builtins-arm64.c
@@ -62,6 +62,55 @@ void prefetch(void) {
// CHECK: call {{.*}} @llvm.aarch64.prefetch(ptr null, i32 0, i32 3, i32 0, i32 1)
}
+void range_prefetch(void) {
+ __builtin_arm_range_prefetch(0, 0, 0, 0); // pldkeep
+ // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i64 0)
+
+ __builtin_arm_range_prefetch(0, 0, 1, 0); // pldstrm
+ // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 1, i64 0)
+
+ __builtin_arm_range_prefetch(0, 1, 0, 0); // pstkeep
+ // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 0, i64 0)
+
+ __builtin_arm_range_prefetch(0, 1, 1, 0); // pststrm
+ // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 1, i64 0)
+}
+
+void range_prefetch_x(void) {
+ __builtin_arm_range_prefetch_x(0, 0, 0, 0, 1, 0, 0); // pldkeep
+ // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i64 0)
+ __builtin_arm_range_prefetch_x(0, 0, 1, 0, 1, 0, 0); // pldstrm
+ // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 1, i64 0)
+ __builtin_arm_range_prefetch_x(0, 1, 0, 0, 1, 0, 0); // pstkeep
+ // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 0, i64 0)
+ __builtin_arm_range_prefetch_x(0, 1, 1, 0, 1, 0, 0); // pststrm
+ // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 1, i64 0)
+
+ // Lower limits (length, count & stride)
+ __builtin_arm_range_prefetch_x(0, 0, 0, -2097152, 1, -2097152, 0);
+ // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i64 576460752305520640)
+
+ // Upper limits (length, count & stride)
+ __builtin_arm_range_prefetch_x(0, 0, 0, 2097151, 65536, 2097151, 0);
+ // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i64 576460752301326335)
+
+ // Distance less than minumum, round up to first power of two (1111)
+ __builtin_arm_range_prefetch_x(0, 0, 0, 0, 1, 0, 1);
+ // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i64 -1152921504606846976)
+
+ // Distance 1 over minimum, round up to next power of 2 (1110)
+ __builtin_arm_range_prefetch_x(0, 0, 0, 0, 1, 0, 32769);
+ // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i64 -2305843009213693952)
+
+ // Distance is a power of two in range (1010)
+ __builtin_arm_range_prefetch_x(0, 0, 0, 0, 1, 0, 1048576);
+ // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i64 -6917529027641081856)
+
+ // Distance is out of range, set to 0 (0000)
+ __builtin_arm_range_prefetch_x(0, 0, 0, 0, 1, 0, 536870913);
+ // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i64 0)
+}
+
__attribute__((target("v8.5a")))
int32_t jcvt(double v) {
//CHECK-LABEL: @jcvt(
diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c
index 4dd243e57a63e..137840f6d2864 100644
--- a/clang/test/Preprocessor/aarch64-target-features.c
+++ b/clang/test/Preprocessor/aarch64-target-features.c
@@ -41,6 +41,7 @@
// CHECK: __ARM_NEON_FP 0xE
// CHECK: __ARM_NEON_SVE_BRIDGE 1
// CHECK: __ARM_PCS_AAPCS64 1
+// CHECK: __ARM_PREFETCH_RANGE 1
// CHECK-NOT: __ARM_PCS 1
// CHECK-NOT: __ARM_PCS_VFP 1
// CHECK-NOT: __ARM_SIZEOF_MINIMAL_ENUM 1
diff --git a/clang/test/Preprocessor/init-aarch64.c b/clang/test/Preprocessor/init-aarch64.c
index 460778f39d003..09e3fc926a309 100644
--- a/clang/test/Preprocessor/init-aarch64.c
+++ b/clang/test/Preprocessor/init-aarch64.c
@@ -32,6 +32,7 @@
// AARCH64-NEXT: #define __ARM_FP16_FORMAT_IEEE 1
// AARCH64-NEXT: #define __ARM_NEON_SVE_BRIDGE 1
// AARCH64-NEXT: #define __ARM_PCS_AAPCS64 1
+// AARCH64-NEXT: #define __ARM_PREFETCH_RANGE 1
// AARCH64-NEXT: #define __ARM_SIZEOF_MINIMAL_ENUM 4
// AARCH64-NEXT: #define __ARM_SIZEOF_WCHAR_T 4
// AARCH64-NEXT: #define __ARM_STATE_ZA 1
diff --git a/clang/test/Sema/builtins-arm64.c b/clang/test/Sema/builtins-arm64.c
index f094162b3aadc..41cffd7ebb1a0 100644
--- a/clang/test/Sema/builtins-arm64.c
+++ b/clang/test/Sema/builtins-arm64.c
@@ -30,6 +30,19 @@ void test_prefetch(void) {
__builtin_arm_prefetch(0, 0, 0, 0, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
}
+void test_range_prefetch(void) {
+ __builtin_arm_range_prefetch(0, 2, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_arm_range_prefetch(0, 0, 2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+ __builtin_arm_range_prefetch_x(0, 2, 0, 0, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_arm_range_prefetch_x(0, 0, 2, 0, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_arm_range_prefetch_x(0, 0, 0, -2097153, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_arm_range_prefetch_x(0, 0, 0, 2097152, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_arm_range_prefetch_x(0, 0, 0, 0, 65537, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_arm_range_prefetch_x(0, 0, 0, 0, 0, -2097153, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_arm_range_prefetch_x(0, 0, 0, 0, 0, 2097152, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
void test_trap(short s, unsigned short us) {
__builtin_arm_trap(42);
__builtin_arm_trap(65535);
@@ -37,4 +50,4 @@ void test_trap(short s, unsigned short us) {
__builtin_arm_trap(65536); // expected-warning {{implicit conversion from 'int' to 'unsigned short' changes value from 65536 to 0}}
__builtin_arm_trap(s); // expected-error {{argument to '__builtin_arm_trap' must be a constant integer}}
__builtin_arm_trap(us); // expected-error {{argument to '__builtin_arm_trap' must be a constant integer}}
-}
\ No newline at end of file
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 221eca5b18d01..fd56e0e3f9e7b 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -76,6 +76,12 @@ def int_aarch64_prefetch : Intrinsic<[],
]>,
ClangBuiltin<"__builtin_arm_prefetch">;
+def int_aarch64_range_prefetch : Intrinsic<[],
+ [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
+ [IntrInaccessibleMemOrArgMemOnly, IntrWillReturn, ReadOnly<ArgIndex<0>>,
+ ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>,
+ ClangBuiltin<"__builtin_arm_range_prefetch">;
+
//===----------------------------------------------------------------------===//
// Data Barrier Instructions
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 0b9a23f217218..bb552861130d2 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -6768,6 +6768,14 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"isdata argument to llvm.aarch64.prefetch must be 0 or 1", Call);
break;
}
+ case Intrinsic::aarch64_range_prefetch: {
+ Check(cast<ConstantInt>(Call.getArgOperand(1))->getZExtValue() < 2,
+ "write argument to llvm.aarch64.range.prefetch must be 0 or 1", Call);
+ Check(cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue() < 2,
+ "stream argument to llvm.aarch64.range.prefetch must be 0 or 1",
+ Call);
+ break;
+ }
case Intrinsic::callbr_landingpad: {
const auto *CBR = dyn_cast<CallBrInst>(Call.getOperand(0));
Check(CBR, "intrinstic requires callbr operand", &Call);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index ffd9641e9f9df..74ee8ff8ab5f5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6229,6 +6229,19 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op,
return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Chain,
DAG.getTargetConstant(PrfOp, DL, MVT::i32), Addr);
}
+ case Intrinsic::aarch64_range_prefetch: {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Addr = Op.getOperand(2);
+
+ unsigned IsWrite = Op.getConstantOperandVal(3);
+ unsigned IsStream = Op.getConstantOperandVal(4);
+ unsigned PrfOp = (IsStream << 2) | IsWrite;
+
+ SDValue Metadata = Op.getOperand(5);
+ return DAG.getNode(AArch64ISD::RANGE_PREFETCH, DL, MVT::Other, Chain,
+ DAG.getTargetConstant(PrfOp, DL, MVT::i32), Addr,
+ Metadata);
+ }
case Intrinsic::aarch64_sme_str:
case Intrinsic::aarch64_sme_ldr: {
return LowerSMELdrStr(Op, DAG, IntNo == Intrinsic::aarch64_sme_ldr);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 497306dabaa97..d0c08036e7d41 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -192,6 +192,12 @@ def G_AARCH64_PREFETCH : AArch64GenericInstruction {
let hasSideEffects = 1;
}
+def G_AARCH64_RANGE_PREFETCH : AArch64GenericInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins type0:$imm, ptype0:$src1, type1:$src2);
+ let hasSideEffects = 1;
+}
+
def G_UMULL : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
@@ -339,6 +345,7 @@ def : GINodeEquiv<G_SRI, AArch64vsri>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
def : GINodeEquiv<G_AARCH64_PREFETCH, AArch64Prefetch>;
+def : GINodeEquiv<G_AARCH64_RANGE_PREFETCH, AArch64RangePrefetch>;
def : GINodeEquiv<G_FPTRUNC_ODD, AArch64fcvtxn_n>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 5a471824ef513..cb8f7c3d70afc 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -538,6 +538,7 @@ def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,3>]>;
def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>;
+def SDT_AArch64RANGE_PREFETCH: SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, i64>]>;
def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
@@ -1054,6 +1055,10 @@ def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET,
def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
[SDNPHasChain, SDNPSideEffect]>;
+def AArch64RangePrefetch: SDNode<"AArch64ISD::RANGE_PREFETCH",
+ SDT_AArch64RANGE_PREFETCH,
+ [SDNPHasChain, SDNPSideEffect]>;
+
// {s|u}int to FP within a FP register.
def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
@@ -11089,6 +11094,9 @@ def RPRFM:
let DecoderNamespace = "Fallback";
}
+def : Pat<(AArch64RangePrefetch rprfop:$Rt, GPR64sp:$Rn, GPR64:$Rm),
+ (RPRFM rprfop:$Rt, GPR64:$Rm, GPR64sp:$Rn)>;
+
//===----------------------------------------------------------------------===//
// 128-bit Atomics (FEAT_LSE128)
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 1ae0b99416a29..e067489283b24 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1755,6 +1755,20 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MI.eraseFromParent();
return true;
}
+ case Intrinsic::aarch64_range_prefetch: {
+ auto &AddrVal = MI.getOperand(1);
+
+ int64_t IsWrite = MI.getOperand(2).getImm();
+ int64_t IsStream = MI.getOperand(3).getImm();
+ unsigned PrfOp = (IsStream << 2) | IsWrite;
+
+ MIB.buildInstr(AArch64::G_AARCH64_RANGE_PREFETCH)
+ .addImm(PrfOp)
+ .add(AddrVal)
+ .addUse(MI.getOperand(4).getReg()); // Metadata
+ MI.eraseFromParent();
+ return true;
+ }
case Intrinsic::aarch64_neon_uaddv:
case Intrinsic::aarch64_neon_saddv:
case Intrinsic::aarch64_neon_umaxv:
diff --git a/llvm/test/CodeGen/AArch64/range-prefetch.ll b/llvm/test/CodeGen/AArch64/range-prefetch.ll
new file mode 100644
index 0000000000000..bc01498296cf3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/range-prefetch.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64 -mattr=+v8.9a --global-isel=0 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64 -mattr=+v8.9a --global-isel=1 --global-isel-abort=1 < %s | FileCheck %s
+
+define void @range_prefetch_metadata_accesses(ptr %a, i64 %metadata) {
+; CHECK-LABEL: range_prefetch_metadata_accesses:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rprfm pldkeep, x1, [x0]
+; CHECK-NEXT: rprfm pstkeep, x1, [x0]
+; CHECK-NEXT: rprfm pldstrm, x1, [x0]
+; CHECK-NEXT: rprfm pststrm, x1, [x0]
+; CHECK-NEXT: ret
+ call void @llvm.aarch64.range.prefetch(ptr %a, i32 0, i32 0, i64 %metadata)
+ call void @llvm.aarch64.range.prefetch(ptr %a, i32 1, i32 0, i64 %metadata)
+ call void @llvm.aarch64.range.prefetch(ptr %a, i32 0, i32 1, i64 %metadata)
+ call void @llvm.aarch64.range.prefetch(ptr %a, i32 1, i32 1, i64 %metadata)
+ ret void
+}
diff --git a/llvm/test/Verifier/AArch64/intrinsic-immarg.ll b/llvm/test/Verifier/AArch64/intrinsic-immarg.ll
new file mode 100644
index 0000000000000..e17c11d66dac4
--- /dev/null
+++ b/llvm/test/Verifier/AArch64/intrinsic-immarg.ll
@@ -0,0 +1,13 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+define void @range_prefetch(ptr %src, i64 %metadata) {
+ ; CHECK: write argument to llvm.aarch64.range.prefetch must be 0 or 1
+ ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 2, i32 0, i64 %metadata)
+ call void @llvm.aarch64.range.prefetch(ptr %src, i32 2, i32 0, i64 %metadata)
+
+ ; CHECK-NEXT: stream argument to llvm.aarch64.range.prefetch must be 0 or 1
+ ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 2, i64 %metadata)
+ call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 2, i64 %metadata)
+
+ ret void
+}
More information about the cfe-commits
mailing list