[llvm] [AArch64] Allow register offset addressing mode for prefetch (PR #124534)
Csanád Hajdú via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 27 04:09:28 PST 2025
https://github.com/Il-Capitano created https://github.com/llvm/llvm-project/pull/124534
Previously instruction selection failed to generate PRFM instructions with register offsets because `AArch64ISD::PREFETCH` is not a `MemSDNode`.
>From 01f6fd5fcb15db02b2cd2a010f08b8257269d36a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Csan=C3=A1d=20Hajd=C3=BA?= <csanad.hajdu at arm.com>
Date: Mon, 27 Jan 2025 12:59:22 +0100
Subject: [PATCH] [AArch64] Allow register offset addressing mode for prefetch
Previously instruction selection failed to generate PRFM instructions
with register offsets because `AArch64ISD::PREFETCH` is not a
`MemSDNode`.
---
.../Target/AArch64/AArch64ISelDAGToDAG.cpp | 12 +-
.../AArch64/arm64-prefetch-addrmode.ll | 147 ++++++++++++++++++
2 files changed, 155 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/arm64-prefetch-addrmode.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 6aa8cd4f0232ac..1387a224fa660e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -665,6 +665,10 @@ static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
}
}
+static bool isMemOpOrPrefetch(SDNode *N) {
+ return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
+}
+
/// Determine whether it is worth it to fold SHL into the addressing
/// mode.
static bool isWorthFoldingSHL(SDValue V) {
@@ -682,9 +686,9 @@ static bool isWorthFoldingSHL(SDValue V) {
// computation, since the computation will be kept.
const SDNode *Node = V.getNode();
for (SDNode *UI : Node->users())
- if (!isa<MemSDNode>(*UI))
+ if (!isMemOpOrPrefetch(UI))
for (SDNode *UII : UI->users())
- if (!isa<MemSDNode>(*UII))
+ if (!isMemOpOrPrefetch(UII))
return false;
return true;
}
@@ -1248,7 +1252,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
// computation, since the computation will be kept.
const SDNode *Node = N.getNode();
for (SDNode *UI : Node->users()) {
- if (!isa<MemSDNode>(*UI))
+ if (!isMemOpOrPrefetch(UI))
return false;
}
@@ -1332,7 +1336,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
// computation, since the computation will be kept.
const SDNode *Node = N.getNode();
for (SDNode *UI : Node->users()) {
- if (!isa<MemSDNode>(*UI))
+ if (!isMemOpOrPrefetch(UI))
return false;
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-prefetch-addrmode.ll b/llvm/test/CodeGen/AArch64/arm64-prefetch-addrmode.ll
new file mode 100644
index 00000000000000..44202ffba6374b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-prefetch-addrmode.ll
@@ -0,0 +1,147 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+
+define void @imm9(ptr %object) {
+; CHECK-LABEL: imm9:
+; CHECK: // %bb.0:
+; CHECK-NEXT: prfum pldl1keep, [x0, #7]
+; CHECK-NEXT: ret
+ %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 7
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @imm9_max(ptr %object) {
+; CHECK-LABEL: imm9_max:
+; CHECK: // %bb.0:
+; CHECK-NEXT: prfum pldl1keep, [x0, #255]
+; CHECK-NEXT: ret
+ %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 255
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @imm9_above_max(ptr %object) {
+; CHECK-LABEL: imm9_above_max:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add x8, x0, #257
+; CHECK-NEXT: prfm pldl1keep, [x8]
+; CHECK-NEXT: ret
+ %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 257 ; 256 would use the imm12 mode
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @imm9_min(ptr %object) {
+; CHECK-LABEL: imm9_min:
+; CHECK: // %bb.0:
+; CHECK-NEXT: prfum pldl1keep, [x0, #-256]
+; CHECK-NEXT: ret
+ %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 -256
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @imm9_below_min(ptr %object) {
+; CHECK-LABEL: imm9_below_min:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub x8, x0, #257
+; CHECK-NEXT: prfm pldl1keep, [x8]
+; CHECK-NEXT: ret
+ %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 -257
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @imm12(ptr %object) {
+; CHECK-LABEL: imm12:
+; CHECK: // %bb.0:
+; CHECK-NEXT: prfm pldl1keep, [x0, #8]
+; CHECK-NEXT: ret
+ %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 1
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @imm12_max(ptr %object) {
+; CHECK-LABEL: imm12_max:
+; CHECK: // %bb.0:
+; CHECK-NEXT: prfm pldl1keep, [x0, #32760]
+; CHECK-NEXT: ret
+ %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 4095
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @imm12_above_max(ptr %object) {
+; CHECK-LABEL: imm12_above_max:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #32768 // =0x8000
+; CHECK-NEXT: prfm pldl1keep, [x0, x8]
+; CHECK-NEXT: ret
+ %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 4096
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @reg(ptr %object, i64 %a) {
+; CHECK-LABEL: reg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: prfm pldl1keep, [x0, x1]
+; CHECK-NEXT: ret
+ %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 %a
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @reg_shift(ptr %object, i64 %a) {
+; CHECK-LABEL: reg_shift:
+; CHECK: // %bb.0:
+; CHECK-NEXT: prfm pldl1keep, [x0, x1, lsl #3]
+; CHECK-NEXT: ret
+ %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 %a
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @reg_sext(ptr %object, i32 %a) {
+; CHECK-LABEL: reg_sext:
+; CHECK: // %bb.0:
+; CHECK-NEXT: prfm pldl1keep, [x0, w1, sxtw]
+; CHECK-NEXT: ret
+ %incdec.ptr = getelementptr inbounds i8, ptr %object, i32 %a
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @reg_sext_shift(ptr %object, i32 %a) {
+; CHECK-LABEL: reg_sext_shift:
+; CHECK: // %bb.0:
+; CHECK-NEXT: prfm pldl1keep, [x0, w1, sxtw #3]
+; CHECK-NEXT: ret
+ %incdec.ptr = getelementptr inbounds i64, ptr %object, i32 %a
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @reg_zext(ptr %object, i32 %a) {
+; CHECK-LABEL: reg_zext:
+; CHECK: // %bb.0:
+; CHECK-NEXT: prfm pldl1keep, [x0, w1, uxtw]
+; CHECK-NEXT: ret
+ %a.zext = zext i32 %a to i64
+ %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 %a.zext
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @reg_zext_shift(ptr %object, i32 %a) {
+; CHECK-LABEL: reg_zext_shift:
+; CHECK: // %bb.0:
+; CHECK-NEXT: prfm pldl1keep, [x0, w1, uxtw #3]
+; CHECK-NEXT: ret
+ %a.zext = zext i32 %a to i64
+ %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 %a.zext
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
More information about the llvm-commits
mailing list