[clang] [llvm] [AArch64] Add support for range prefetch intrinsic (PR #170490)
Kerry McLaughlin via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 8 05:58:20 PST 2025
https://github.com/kmclaughlin-arm updated https://github.com/llvm/llvm-project/pull/170490
>From b2a59f99a05563c819b720ea56d806beb3f13d79 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Tue, 25 Nov 2025 14:21:21 +0000
Subject: [PATCH 1/2] [AArch64] Add support for range prefetch intrinsic
This patch adds support in Clang for the RPRFM instruction, which is
available when FEAT_RPRFM is defined:
void __rpld(int64_t access_kind, uint64_t retention_policy
uint64_t reuse distance, int64_t stride,
uint64_t count, int64_t length, void const *addr);
If FEAT_RPRFM is not available, this instruction is a NOP.
This implements the following ACLE proposal:
https://github.com/ARM-software/acle/pull/423
---
clang/include/clang/Basic/BuiltinsAArch64.def | 3 ++
clang/lib/Basic/Targets/AArch64.cpp | 7 +++++
clang/lib/Basic/Targets/AArch64.h | 1 +
clang/lib/Headers/arm_acle.h | 4 +++
clang/lib/Sema/SemaARM.cpp | 9 ++++++
clang/test/CodeGen/arm_acle.c | 13 +++++++++
clang/test/CodeGen/builtins-arm64.c | 14 +++++++++
.../print-supported-extensions-aarch64.c | 1 +
.../Preprocessor/aarch64-target-features.c | 3 ++
clang/test/Sema/builtins-arm64.c | 13 ++++++++-
llvm/include/llvm/IR/IntrinsicsAArch64.td | 8 +++++
llvm/lib/IR/Verifier.cpp | 21 ++++++++++++++
llvm/lib/Target/AArch64/AArch64Features.td | 3 ++
.../Target/AArch64/AArch64ISelLowering.cpp | 23 +++++++++++++++
llvm/lib/Target/AArch64/AArch64InstrGISel.td | 7 +++++
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 10 +++++++
.../AArch64/AsmParser/AArch64AsmParser.cpp | 1 +
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 29 ++++++++++++++++++-
llvm/test/CodeGen/AArch64/range-prefetch.ll | 28 ++++++++++++++++++
19 files changed, 196 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/range-prefetch.ll
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def
index adb6c941e852a..7bbf747d705c7 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -96,6 +96,9 @@ TARGET_BUILTIN(__builtin_arm_jcvt, "Zid", "nc", "v8.3a")
// Prefetch
BUILTIN(__builtin_arm_prefetch, "vvC*UiUiUiUi", "nc")
+// Range Prefetch
+BUILTIN(__builtin_arm_range_prefetch, "vvC*UiUiUiiUii", "nc")
+
// System Registers
BUILTIN(__builtin_arm_rsr, "UicC*", "nc")
BUILTIN(__builtin_arm_rsr64, "WUicC*", "nc")
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index d7f36c0f9b79a..38018953a269e 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -612,6 +612,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
if (HasLSE)
Builder.defineMacro("__ARM_FEATURE_ATOMICS", "1");
+ if (HasRPRFM)
+ Builder.defineMacro("__ARM_FEATURE_RPRFM", "1");
+
if (HasBFloat16) {
Builder.defineMacro("__ARM_FEATURE_BF16", "1");
Builder.defineMacro("__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", "1");
@@ -870,6 +873,7 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
.Case("ssve-fp8fma", HasSSVE_FP8FMA)
.Case("sme-f8f32", HasSME_F8F32)
.Case("sme-f8f16", HasSME_F8F16)
+ .Case("rprfm", HasRPRFM)
.Default(false);
}
@@ -1100,6 +1104,9 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
if (Feature == "+strict-align")
HasUnalignedAccess = false;
+ if (Feature == "+rprfm")
+ HasRPRFM = true;
+
// All predecessor archs are added but select the latest one for ArchKind.
if (Feature == "+v8a" && ArchInfo->Version < llvm::AArch64::ARMV8A.Version)
ArchInfo = &llvm::AArch64::ARMV8A;
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index 1a7aa658e9d87..866a9cb2c2711 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -131,6 +131,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
bool HasRCPC3 = false;
bool HasSMEFA64 = false;
bool HasPAuthLR = false;
+ bool HasRPRFM = false;
const llvm::AArch64::ArchInfo *ArchInfo = &llvm::AArch64::ARMV8A;
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 97f63e8ecf71f..4b6cd97be602a 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -98,6 +98,10 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
#else
#define __pldx(access_kind, cache_level, retention_policy, addr) \
__builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)
+#define __rpld(access_kind, retention_policy, reuse_distance, stride, count, \
+ length, addr) \
+ __builtin_arm_range_prefetch(addr, access_kind, retention_policy, \
+ reuse_distance, stride, count, length)
#endif
/* 7.6.2 Instruction prefetch */
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index a5164a94b57fa..da3438fb77118 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -1122,6 +1122,15 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI,
SemaRef.BuiltinConstantArgRange(TheCall, 4, 0, 1);
}
+ if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch) {
+ return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1) ||
+ SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 1) ||
+ SemaRef.BuiltinConstantArgRange(TheCall, 3, 0, 15) ||
+ SemaRef.BuiltinConstantArgRange(TheCall, 4, -2048, 2040) ||
+ SemaRef.BuiltinConstantArgRange(TheCall, 5, 0, 65535) ||
+ SemaRef.BuiltinConstantArgRange(TheCall, 6, -2048, 2040);
+ }
+
if (BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
BuiltinID == AArch64::BI__builtin_arm_rsr128 ||
diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c
index 0f539cba5c758..1f1c8b82c0ae1 100644
--- a/clang/test/CodeGen/arm_acle.c
+++ b/clang/test/CodeGen/arm_acle.c
@@ -164,6 +164,19 @@ void test_pld() {
__pld(0);
}
+#if defined(__ARM_64BIT_STATE)
+
+// AArch64-LABEL: @test_rpld(
+// AArch64-NEXT: entry:
+// AArch64-NEXT: call void @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 1, i32 15, i32 -2048, i32 65535, i32 2040)
+// AArch64-NEXT: ret void
+//
+void test_rpld() {
+ __rpld(1, 1, 15, -2048, 65535, 2040, 0);
+}
+
+#endif
+
// AArch32-LABEL: @test_pldx(
// AArch32-NEXT: entry:
// AArch32-NEXT: call void @llvm.prefetch.p0(ptr null, i32 1, i32 3, i32 1)
diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c
index 86c2812434643..1262823bf6ed3 100644
--- a/clang/test/CodeGen/builtins-arm64.c
+++ b/clang/test/CodeGen/builtins-arm64.c
@@ -62,6 +62,20 @@ void prefetch(void) {
// CHECK: call {{.*}} @llvm.aarch64.prefetch(ptr null, i32 0, i32 3, i32 0, i32 1)
}
+void range_prefetch(void) {
+ __builtin_arm_range_prefetch(0, 0, 0, 15, 1024, 24, 2); // pldkeep
+ // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i32 15, i32 1024, i32 24, i32 2)
+
+ __builtin_arm_range_prefetch(0, 0, 1, 15, 1024, 24, 2); // pldstrm
+ // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 1, i32 15, i32 1024, i32 24, i32 2)
+
+ __builtin_arm_range_prefetch(0, 1, 0, 15, 1024, 24, 2); // pstkeep
+ // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 0, i32 15, i32 1024, i32 24, i32 2)
+
+ __builtin_arm_range_prefetch(0, 1, 1, 15, 1024, 24, 2); // pststrm
+ // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 1, i32 15, i32 1024, i32 24, i32 2)
+}
+
__attribute__((target("v8.5a")))
int32_t jcvt(double v) {
//CHECK-LABEL: @jcvt(
diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c b/clang/test/Driver/print-supported-extensions-aarch64.c
index 1f8929e705e4c..1a34478f11c6b 100644
--- a/clang/test/Driver/print-supported-extensions-aarch64.c
+++ b/clang/test/Driver/print-supported-extensions-aarch64.c
@@ -69,6 +69,7 @@
// CHECK-NEXT: rcpc FEAT_LRCPC Enable support for RCPC extension
// CHECK-NEXT: rcpc3 FEAT_LRCPC3 Enable Armv8.9-A RCPC instructions for A64 and Advanced SIMD and floating-point instruction set
// CHECK-NEXT: rdm FEAT_RDM Enable Armv8.1-A Rounding Double Multiply Add/Subtract instructions
+// CHECK-NEXT: rprfm FEAT_RPRFM Enable Armv8.0-A Range Prefetch Memory instruction
// CHECK-NEXT: sb FEAT_SB Enable Armv8.5-A Speculation Barrier
// CHECK-NEXT: sha2 FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support
// CHECK-NEXT: sha3 FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support
diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c
index 4dd243e57a63e..d5d78f1118a4f 100644
--- a/clang/test/Preprocessor/aarch64-target-features.c
+++ b/clang/test/Preprocessor/aarch64-target-features.c
@@ -789,3 +789,6 @@
// CHECK-SMEF8F16: __ARM_FEATURE_FP8 1
// CHECK-SMEF8F16: __ARM_FEATURE_SME2 1
// CHECK-SMEF8F16: __ARM_FEATURE_SME_F8F16 1
+
+// RUN: %clang --target=aarch64 -march=armv8-a+rprfm -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-RPRFM %s
+// CHECK-RPRFM: __ARM_FEATURE_RPRFM 1
diff --git a/clang/test/Sema/builtins-arm64.c b/clang/test/Sema/builtins-arm64.c
index f094162b3aadc..3d26b16d461d0 100644
--- a/clang/test/Sema/builtins-arm64.c
+++ b/clang/test/Sema/builtins-arm64.c
@@ -30,6 +30,17 @@ void test_prefetch(void) {
__builtin_arm_prefetch(0, 0, 0, 0, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
}
+void test_range_prefetch(void) {
+ __builtin_arm_range_prefetch(0, 2, 0, 0, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_arm_range_prefetch(0, 0, 2, 0, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_arm_range_prefetch(0, 0, 0, 16, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_arm_range_prefetch(0, 0, 0, 0, -2049, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_arm_range_prefetch(0, 0, 0, 0, 2041, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_arm_range_prefetch(0, 0, 0, 0, 0, 65536, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_arm_range_prefetch(0, 0, 0, 0, 0, 0, -2049); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_arm_range_prefetch(0, 0, 0, 0, 0, 0, 2041); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
void test_trap(short s, unsigned short us) {
__builtin_arm_trap(42);
__builtin_arm_trap(65535);
@@ -37,4 +48,4 @@ void test_trap(short s, unsigned short us) {
__builtin_arm_trap(65536); // expected-warning {{implicit conversion from 'int' to 'unsigned short' changes value from 65536 to 0}}
__builtin_arm_trap(s); // expected-error {{argument to '__builtin_arm_trap' must be a constant integer}}
__builtin_arm_trap(us); // expected-error {{argument to '__builtin_arm_trap' must be a constant integer}}
-}
\ No newline at end of file
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 1c86c6815f049..43a7f10ce2618 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -76,6 +76,14 @@ def int_aarch64_prefetch : Intrinsic<[],
]>,
ClangBuiltin<"__builtin_arm_prefetch">;
+def int_aarch64_range_prefetch : Intrinsic<[],
+ [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
+ llvm_i32_ty, llvm_i32_ty],
+ [IntrInaccessibleMemOrArgMemOnly, IntrWillReturn, ReadOnly<ArgIndex<0>>,
+ ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>,
+ ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>]>,
+ ClangBuiltin<"__builtin_arm_range_prefetch">;
+
//===----------------------------------------------------------------------===//
// Data Barrier Instructions
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index a1e14d8f25bf7..17c4bfd67b4c0 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -6701,6 +6701,27 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"isdata argument to llvm.aarch64.prefetch must be 0 or 1", Call);
break;
}
+ case Intrinsic::aarch64_range_prefetch: {
+ Check(cast<ConstantInt>(Call.getArgOperand(1))->getZExtValue() < 2,
+ "write argument to llvm.aarch64.range.prefetch must be 0 or 1", Call);
+ Check(cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue() < 2,
+ "stream argument to llvm.aarch64.range.prefetch must be 0 or 1",
+ Call);
+ Check(cast<ConstantInt>(Call.getArgOperand(3))->getZExtValue() < 16,
+ "reuse distance argument to llvm.aarch64.range.prefetch must be < 16",
+ Call);
+ int Stride = cast<ConstantInt>(Call.getArgOperand(4))->getZExtValue();
+ Check(Stride > -2049 && Stride < 2041,
+ "stride argument to llvm.aarch64.range.prefetch must be -2048 - 2040",
+ Call);
+ Check(cast<ConstantInt>(Call.getArgOperand(5))->getZExtValue() < 65536,
+ "count argument to llvm.aarch64.range.prefetch must be < 65536");
+ int Length = cast<ConstantInt>(Call.getArgOperand(6))->getZExtValue();
+ Check(Length > -2049 && Length < 2041,
+ "length argument to llvm.aarch64.range.prefetch must be -2048 -"
+ "2040");
+ break;
+ }
case Intrinsic::callbr_landingpad: {
const auto *CBR = dyn_cast<CallBrInst>(Call.getOperand(0));
Check(CBR, "intrinstic requires callbr operand", &Call);
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 066724bea92c9..e643bdf6fea74 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -101,6 +101,9 @@ def FeaturePerfMon : ExtensionWithMArch<"perfmon", "PerfMon", "FEAT_PMUv3",
def FeatureSpecRestrict : Extension<"specrestrict", "SpecRestrict", "FEAT_CSV2_2",
"Enable architectural speculation restriction">;
+def FeatureRPRFM : ExtensionWithMArch<"rprfm", "RPRFM", "FEAT_RPRFM",
+ "Enable Armv8.0-A Range Prefetch Memory instruction">;
+
//===----------------------------------------------------------------------===//
// Armv8.1 Architecture Extensions
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2ce8f6d924a78..dd6248afe9358 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6161,6 +6161,29 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op,
return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Chain,
DAG.getTargetConstant(PrfOp, DL, MVT::i32), Addr);
}
+ case Intrinsic::aarch64_range_prefetch: {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Addr = Op.getOperand(2);
+
+ unsigned IsWrite = Op.getConstantOperandVal(3);
+ unsigned IsStream = Op.getConstantOperandVal(4);
+ unsigned PrfOp = (IsStream << 2) | IsWrite;
+
+ uint64_t Distance = Op.getConstantOperandVal(5);
+ int64_t Stride = Op.getConstantOperandVal(6);
+ uint64_t Count = Op.getConstantOperandVal(7);
+ int64_t Length = Op.getConstantOperandVal(8);
+ uint64_t Mask22 = (1ULL << 22) - 1;
+ uint64_t Mask16 = (1ULL << 16) - 1;
+ uint64_t Metadata = (Distance << 60) |
+ ((Stride & Mask22) << 38) |
+ ((Count & Mask16) << 22) |
+ (Length & Mask22);
+
+ return DAG.getNode(AArch64ISD::RANGE_PREFETCH, DL, MVT::Other, Chain,
+ DAG.getTargetConstant(PrfOp, DL, MVT::i32), Addr,
+ DAG.getConstant(Metadata, DL, MVT::i64));
+ }
case Intrinsic::aarch64_sme_str:
case Intrinsic::aarch64_sme_ldr: {
return LowerSMELdrStr(Op, DAG, IntNo == Intrinsic::aarch64_sme_ldr);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 7d99786830e3d..c40a9e34b37a2 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -192,6 +192,12 @@ def G_AARCH64_PREFETCH : AArch64GenericInstruction {
let hasSideEffects = 1;
}
+def G_AARCH64_RANGE_PREFETCH : AArch64GenericInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins type0:$imm, ptype0:$src1, type1:$src2);
+ let hasSideEffects = 1;
+}
+
def G_UMULL : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
@@ -303,6 +309,7 @@ def : GINodeEquiv<G_USDOT, AArch64usdot>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
def : GINodeEquiv<G_AARCH64_PREFETCH, AArch64Prefetch>;
+def : GINodeEquiv<G_AARCH64_RANGE_PREFETCH, AArch64RangePrefetch>;
def : GINodeEquiv<G_FPTRUNC_ODD, AArch64fcvtxn_n>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index da93a2b13fc11..0007ddba3d941 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -411,6 +411,8 @@ def HasS1POE2 : Predicate<"Subtarget->hasS1POE2()">,
AssemblerPredicateWithAll<(all_of FeatureS1POE2), "poe2">;
def HasTEV : Predicate<"Subtarget->hasTEV()">,
AssemblerPredicateWithAll<(all_of FeatureTEV), "tev">;
+def HasRPRFM : Predicate<"Subtarget->hasRPRFM()">,
+ AssemblerPredicateWithAll<(all_of FeatureRPRFM), "rprfm">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
@@ -536,6 +538,7 @@ def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,3>]>;
def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>;
+def SDT_AArch64RANGE_PREFETCH: SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisPtrTy<2>]>;
def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
@@ -1038,6 +1041,10 @@ def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET,
def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
[SDNPHasChain, SDNPSideEffect]>;
+def AArch64RangePrefetch: SDNode<"AArch64ISD::RANGE_PREFETCH",
+ SDT_AArch64RANGE_PREFETCH,
+ [SDNPHasChain, SDNPSideEffect]>;
+
// {s|u}int to FP within a FP register.
def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
@@ -10980,6 +10987,9 @@ def RPRFM:
let DecoderNamespace = "Fallback";
}
+def : Pat<(AArch64RangePrefetch rprfop:$Rt, GPR64sp:$Rn, GPR64:$Rm),
+ (RPRFM rprfop:$Rt, GPR64:$Rm, GPR64sp:$Rn)>;
+
//===----------------------------------------------------------------------===//
// 128-bit Atomics (FEAT_LSE128)
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 433cb0387c470..09075d7fac90a 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3954,6 +3954,7 @@ static const struct Extension {
{"poe2", {AArch64::FeatureS1POE2}},
{"tev", {AArch64::FeatureTEV}},
{"btie", {AArch64::FeatureBTIE}},
+ {"rprfm", {AArch64::FeatureRPRFM}},
};
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 1025b2502211a..dad362785ba3f 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1749,6 +1749,33 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MI.eraseFromParent();
return true;
}
+ case Intrinsic::aarch64_range_prefetch: {
+ auto &AddrVal = MI.getOperand(1);
+
+ int64_t IsWrite = MI.getOperand(2).getImm();
+ int64_t IsStream = MI.getOperand(3).getImm();
+ unsigned PrfOp = (IsStream << 2) | IsWrite;
+
+ int64_t Distance = MI.getOperand(4).getImm();
+ int64_t Stride = MI.getOperand(5).getImm();
+ int64_t Count = MI.getOperand(6).getImm();
+ int64_t Length = MI.getOperand(7).getImm();
+ uint64_t Mask22 = (1ULL << 22) - 1;
+ uint64_t Mask16 = (1ULL << 16) - 1;
+ uint64_t Metadata = (Distance << 60) |
+ ((Stride & Mask22) << 38) |
+ ((Count & Mask16) << 22) |
+ (Length & Mask22);
+
+ auto MetadataReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
+ MIB.buildConstant(MetadataReg, Metadata);
+ MIB.buildInstr(AArch64::G_AARCH64_RANGE_PREFETCH)
+ .addImm(PrfOp)
+ .add(AddrVal)
+ .addUse(MetadataReg);
+ MI.eraseFromParent();
+ return true;
+ }
case Intrinsic::aarch64_neon_uaddv:
case Intrinsic::aarch64_neon_saddv:
case Intrinsic::aarch64_neon_umaxv:
@@ -2506,4 +2533,4 @@ bool AArch64LegalizerInfo::legalizeFptrunc(MachineInstr &MI,
MRI.replaceRegWith(Dst, Fin);
MI.eraseFromParent();
return true;
-}
\ No newline at end of file
+}
diff --git a/llvm/test/CodeGen/AArch64/range-prefetch.ll b/llvm/test/CodeGen/AArch64/range-prefetch.ll
new file mode 100644
index 0000000000000..a010346d58979
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/range-prefetch.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64 -mattr=+v8.9a --global-isel=0 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64 -mattr=+v8.9a --global-isel=1 --global-isel-abort=1 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64 -mattr=+v8.9a -mattr=+rprfm --global-isel=0 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64 -mattr=+v8.9a -mattr=+rprfm --global-isel=1 --global-isel-abort=1 < %s | FileCheck %s
+
+define void @test(ptr %a, ptr %b) {
+; CHECK-LABEL: test:
+; CHECK: mov x8, #-562675075514369
+; CHECK-NEXT: mov x9, #4192256
+; CHECK-NEXT: movk x8, #2040
+; CHECK-NEXT: orr x9, x9, #0x1fe0000000000
+; CHECK-NEXT: movk x8, #65472, lsl #16
+
+; CHECK-NEXT: rprfm pldkeep, x8, [x0]
+ call void @llvm.aarch64.range.prefetch(ptr %a, i32 0, i32 0, i32 15, i32 -2048, i32 65535, i32 2040)
+
+; CHECK-NEXT: rprfm pstkeep, x8, [x1]
+ call void @llvm.aarch64.range.prefetch(ptr %b, i32 1, i32 0, i32 15, i32 -2048, i32 65535, i32 2040)
+
+; CHECK-NEXT: rprfm pldstrm, x9, [x0]
+ call void @llvm.aarch64.range.prefetch(ptr %a, i32 0, i32 1, i32 0, i32 2040, i32 0, i32 -2048)
+
+; CHECK-NEXT: rprfm pststrm, x9, [x1]
+ call void @llvm.aarch64.range.prefetch(ptr %b, i32 1, i32 1, i32 0, i32 2040, i32 0, i32 -2048)
+
+ ret void
+}
>From ed82848f29f19720c19ddbd09b57dd135dc5a43a Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Mon, 8 Dec 2025 11:49:25 +0000
Subject: [PATCH 2/2] - Change range of Count argument (1 to 65535) and
subtract 1 during lowering - Add Verifier tests - Run clang-format
---
clang/lib/Sema/SemaARM.cpp | 2 +-
clang/test/CodeGen/arm_acle.c | 4 +-
clang/test/Sema/builtins-arm64.c | 2 +-
llvm/lib/IR/Verifier.cpp | 11 +++--
.../Target/AArch64/AArch64ISelLowering.cpp | 8 ++--
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 8 ++--
llvm/test/CodeGen/AArch64/range-prefetch.ll | 8 ++--
.../test/Verifier/AArch64/intrinsic-immarg.ll | 41 +++++++++++++++++++
8 files changed, 62 insertions(+), 22 deletions(-)
create mode 100644 llvm/test/Verifier/AArch64/intrinsic-immarg.ll
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index da3438fb77118..2f565773c43ae 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -1127,7 +1127,7 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI,
SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 1) ||
SemaRef.BuiltinConstantArgRange(TheCall, 3, 0, 15) ||
SemaRef.BuiltinConstantArgRange(TheCall, 4, -2048, 2040) ||
- SemaRef.BuiltinConstantArgRange(TheCall, 5, 0, 65535) ||
+ SemaRef.BuiltinConstantArgRange(TheCall, 5, 1, 65536) ||
SemaRef.BuiltinConstantArgRange(TheCall, 6, -2048, 2040);
}
diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c
index 1f1c8b82c0ae1..dd0713e69666f 100644
--- a/clang/test/CodeGen/arm_acle.c
+++ b/clang/test/CodeGen/arm_acle.c
@@ -168,11 +168,11 @@ void test_pld() {
// AArch64-LABEL: @test_rpld(
// AArch64-NEXT: entry:
-// AArch64-NEXT: call void @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 1, i32 15, i32 -2048, i32 65535, i32 2040)
+// AArch64-NEXT: call void @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 1, i32 15, i32 -2048, i32 65536, i32 2040)
// AArch64-NEXT: ret void
//
void test_rpld() {
- __rpld(1, 1, 15, -2048, 65535, 2040, 0);
+ __rpld(1, 1, 15, -2048, 65536, 2040, 0);
}
#endif
diff --git a/clang/test/Sema/builtins-arm64.c b/clang/test/Sema/builtins-arm64.c
index 3d26b16d461d0..441f91dc4e194 100644
--- a/clang/test/Sema/builtins-arm64.c
+++ b/clang/test/Sema/builtins-arm64.c
@@ -36,7 +36,7 @@ void test_range_prefetch(void) {
__builtin_arm_range_prefetch(0, 0, 0, 16, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
__builtin_arm_range_prefetch(0, 0, 0, 0, -2049, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
__builtin_arm_range_prefetch(0, 0, 0, 0, 2041, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
- __builtin_arm_range_prefetch(0, 0, 0, 0, 0, 65536, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_arm_range_prefetch(0, 0, 0, 0, 0, 65537, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
__builtin_arm_range_prefetch(0, 0, 0, 0, 0, 0, -2049); // expected-error-re {{argument value {{.*}} is outside the valid range}}
__builtin_arm_range_prefetch(0, 0, 0, 0, 0, 0, 2041); // expected-error-re {{argument value {{.*}} is outside the valid range}}
}
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 17c4bfd67b4c0..ae7da03f82025 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -6714,12 +6714,15 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
Check(Stride > -2049 && Stride < 2041,
"stride argument to llvm.aarch64.range.prefetch must be -2048 - 2040",
Call);
- Check(cast<ConstantInt>(Call.getArgOperand(5))->getZExtValue() < 65536,
- "count argument to llvm.aarch64.range.prefetch must be < 65536");
+ int Count = cast<ConstantInt>(Call.getArgOperand(5))->getZExtValue();
+ Check(Count > 0 && Count < 65537,
+ "count argument to llvm.aarch64.range.prefetch must be < 65537",
+ Call);
int Length = cast<ConstantInt>(Call.getArgOperand(6))->getZExtValue();
Check(Length > -2049 && Length < 2041,
- "length argument to llvm.aarch64.range.prefetch must be -2048 -"
- "2040");
+ "length argument to llvm.aarch64.range.prefetch must be -2048 - "
+ "2040",
+ Call);
break;
}
case Intrinsic::callbr_landingpad: {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index dd6248afe9358..b7813695facfa 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6171,14 +6171,12 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op,
uint64_t Distance = Op.getConstantOperandVal(5);
int64_t Stride = Op.getConstantOperandVal(6);
- uint64_t Count = Op.getConstantOperandVal(7);
+ uint64_t Count = Op.getConstantOperandVal(7) - 1;
int64_t Length = Op.getConstantOperandVal(8);
uint64_t Mask22 = (1ULL << 22) - 1;
uint64_t Mask16 = (1ULL << 16) - 1;
- uint64_t Metadata = (Distance << 60) |
- ((Stride & Mask22) << 38) |
- ((Count & Mask16) << 22) |
- (Length & Mask22);
+ uint64_t Metadata = (Distance << 60) | ((Stride & Mask22) << 38) |
+ ((Count & Mask16) << 22) | (Length & Mask22);
return DAG.getNode(AArch64ISD::RANGE_PREFETCH, DL, MVT::Other, Chain,
DAG.getTargetConstant(PrfOp, DL, MVT::i32), Addr,
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index dad362785ba3f..9ed8ed5c53e2f 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1758,14 +1758,12 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
int64_t Distance = MI.getOperand(4).getImm();
int64_t Stride = MI.getOperand(5).getImm();
- int64_t Count = MI.getOperand(6).getImm();
+ int64_t Count = MI.getOperand(6).getImm() - 1;
int64_t Length = MI.getOperand(7).getImm();
uint64_t Mask22 = (1ULL << 22) - 1;
uint64_t Mask16 = (1ULL << 16) - 1;
- uint64_t Metadata = (Distance << 60) |
- ((Stride & Mask22) << 38) |
- ((Count & Mask16) << 22) |
- (Length & Mask22);
+ uint64_t Metadata = (Distance << 60) | ((Stride & Mask22) << 38) |
+ ((Count & Mask16) << 22) | (Length & Mask22);
auto MetadataReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
MIB.buildConstant(MetadataReg, Metadata);
diff --git a/llvm/test/CodeGen/AArch64/range-prefetch.ll b/llvm/test/CodeGen/AArch64/range-prefetch.ll
index a010346d58979..973a4e8684c48 100644
--- a/llvm/test/CodeGen/AArch64/range-prefetch.ll
+++ b/llvm/test/CodeGen/AArch64/range-prefetch.ll
@@ -13,16 +13,16 @@ define void @test(ptr %a, ptr %b) {
; CHECK-NEXT: movk x8, #65472, lsl #16
; CHECK-NEXT: rprfm pldkeep, x8, [x0]
- call void @llvm.aarch64.range.prefetch(ptr %a, i32 0, i32 0, i32 15, i32 -2048, i32 65535, i32 2040)
+ call void @llvm.aarch64.range.prefetch(ptr %a, i32 0, i32 0, i32 15, i32 -2048, i32 65536, i32 2040)
; CHECK-NEXT: rprfm pstkeep, x8, [x1]
- call void @llvm.aarch64.range.prefetch(ptr %b, i32 1, i32 0, i32 15, i32 -2048, i32 65535, i32 2040)
+ call void @llvm.aarch64.range.prefetch(ptr %b, i32 1, i32 0, i32 15, i32 -2048, i32 65536, i32 2040)
; CHECK-NEXT: rprfm pldstrm, x9, [x0]
- call void @llvm.aarch64.range.prefetch(ptr %a, i32 0, i32 1, i32 0, i32 2040, i32 0, i32 -2048)
+ call void @llvm.aarch64.range.prefetch(ptr %a, i32 0, i32 1, i32 0, i32 2040, i32 1, i32 -2048)
; CHECK-NEXT: rprfm pststrm, x9, [x1]
- call void @llvm.aarch64.range.prefetch(ptr %b, i32 1, i32 1, i32 0, i32 2040, i32 0, i32 -2048)
+ call void @llvm.aarch64.range.prefetch(ptr %b, i32 1, i32 1, i32 0, i32 2040, i32 1, i32 -2048)
ret void
}
diff --git a/llvm/test/Verifier/AArch64/intrinsic-immarg.ll b/llvm/test/Verifier/AArch64/intrinsic-immarg.ll
new file mode 100644
index 0000000000000..cbaf285efc156
--- /dev/null
+++ b/llvm/test/Verifier/AArch64/intrinsic-immarg.ll
@@ -0,0 +1,41 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+define void @range_prefetch(ptr %src) {
+ ; CHECK: write argument to llvm.aarch64.range.prefetch must be 0 or 1
+ ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 2, i32 0, i32 0, i32 0, i32 1, i32 0)
+ call void @llvm.aarch64.range.prefetch(ptr %src, i32 2, i32 0, i32 0, i32 0, i32 1, i32 0)
+
+ ; CHECK-NEXT: stream argument to llvm.aarch64.range.prefetch must be 0 or 1
+ ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 2, i32 0, i32 0, i32 1, i32 0)
+ call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 2, i32 0, i32 0, i32 1, i32 0)
+
+ ; CHECK-NEXT: reuse distance argument to llvm.aarch64.range.prefetch must be < 16
+ ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 16, i32 0, i32 1, i32 0)
+ call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 16, i32 0, i32 1, i32 0)
+
+ ; CHECK-NEXT: stride argument to llvm.aarch64.range.prefetch must be -2048 - 2040
+ ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 -2049, i32 1, i32 0)
+ call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 -2049, i32 1, i32 0)
+
+ ; CHECK-NEXT: stride argument to llvm.aarch64.range.prefetch must be -2048 - 2040
+ ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 2041, i32 1, i32 0)
+ call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 2041, i32 1, i32 0)
+
+ ; CHECK-NEXT: count argument to llvm.aarch64.range.prefetch must be < 65537
+ ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+ call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+
+ ; CHECK-NEXT: count argument to llvm.aarch64.range.prefetch must be < 65537
+ ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 65537, i32 0)
+ call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 65537, i32 0)
+
+ ; CHECK-NEXT: length argument to llvm.aarch64.range.prefetch must be -2048 - 2040
+ ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 1, i32 -2049)
+ call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 1, i32 -2049)
+
+ ; CHECK-NEXT: length argument to llvm.aarch64.range.prefetch must be -2048 - 2040
+ ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 1, i32 2041)
+ call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 1, i32 2041)
+
+ ret void
+}
More information about the llvm-commits
mailing list