[llvm] [AArch64] Allow register offset addressing mode for prefetch (PR #124534)

Csanád Hajdú via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 27 04:09:28 PST 2025


https://github.com/Il-Capitano created https://github.com/llvm/llvm-project/pull/124534

Previously instruction selection failed to generate PRFM instructions with register offsets because `AArch64ISD::PREFETCH` is not a `MemSDNode`.

>From 01f6fd5fcb15db02b2cd2a010f08b8257269d36a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Csan=C3=A1d=20Hajd=C3=BA?= <csanad.hajdu at arm.com>
Date: Mon, 27 Jan 2025 12:59:22 +0100
Subject: [PATCH] [AArch64] Allow register offset addressing mode for prefetch

Previously instruction selection failed to generate PRFM instructions
with register offsets because `AArch64ISD::PREFETCH` is not a
`MemSDNode`.
---
 .../Target/AArch64/AArch64ISelDAGToDAG.cpp    |  12 +-
 .../AArch64/arm64-prefetch-addrmode.ll        | 147 ++++++++++++++++++
 2 files changed, 155 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/arm64-prefetch-addrmode.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 6aa8cd4f0232ac..1387a224fa660e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -665,6 +665,10 @@ static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
   }
 }
 
+static bool isMemOpOrPrefetch(SDNode *N) {
+  return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
+}
+
 /// Determine whether it is worth it to fold SHL into the addressing
 /// mode.
 static bool isWorthFoldingSHL(SDValue V) {
@@ -682,9 +686,9 @@ static bool isWorthFoldingSHL(SDValue V) {
   // computation, since the computation will be kept.
   const SDNode *Node = V.getNode();
   for (SDNode *UI : Node->users())
-    if (!isa<MemSDNode>(*UI))
+    if (!isMemOpOrPrefetch(UI))
       for (SDNode *UII : UI->users())
-        if (!isa<MemSDNode>(*UII))
+        if (!isMemOpOrPrefetch(UII))
           return false;
   return true;
 }
@@ -1248,7 +1252,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
   // computation, since the computation will be kept.
   const SDNode *Node = N.getNode();
   for (SDNode *UI : Node->users()) {
-    if (!isa<MemSDNode>(*UI))
+    if (!isMemOpOrPrefetch(UI))
       return false;
   }
 
@@ -1332,7 +1336,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
   // computation, since the computation will be kept.
   const SDNode *Node = N.getNode();
   for (SDNode *UI : Node->users()) {
-    if (!isa<MemSDNode>(*UI))
+    if (!isMemOpOrPrefetch(UI))
       return false;
   }
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-prefetch-addrmode.ll b/llvm/test/CodeGen/AArch64/arm64-prefetch-addrmode.ll
new file mode 100644
index 00000000000000..44202ffba6374b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-prefetch-addrmode.ll
@@ -0,0 +1,147 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+
+define void @imm9(ptr %object) {
+; CHECK-LABEL: imm9:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfum pldl1keep, [x0, #7]
+; CHECK-NEXT:    ret
+  %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 7
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @imm9_max(ptr %object) {
+; CHECK-LABEL: imm9_max:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfum pldl1keep, [x0, #255]
+; CHECK-NEXT:    ret
+  %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 255
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @imm9_above_max(ptr %object) {
+; CHECK-LABEL: imm9_above_max:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add x8, x0, #257
+; CHECK-NEXT:    prfm pldl1keep, [x8]
+; CHECK-NEXT:    ret
+  %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 257  ; 256 would use the imm12 mode
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @imm9_min(ptr %object) {
+; CHECK-LABEL: imm9_min:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfum pldl1keep, [x0, #-256]
+; CHECK-NEXT:    ret
+  %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 -256
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @imm9_below_min(ptr %object) {
+; CHECK-LABEL: imm9_below_min:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub x8, x0, #257
+; CHECK-NEXT:    prfm pldl1keep, [x8]
+; CHECK-NEXT:    ret
+  %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 -257
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @imm12(ptr %object) {
+; CHECK-LABEL: imm12:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfm pldl1keep, [x0, #8]
+; CHECK-NEXT:    ret
+  %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 1
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @imm12_max(ptr %object) {
+; CHECK-LABEL: imm12_max:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfm pldl1keep, [x0, #32760]
+; CHECK-NEXT:    ret
+  %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 4095
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @imm12_above_max(ptr %object) {
+; CHECK-LABEL: imm12_above_max:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32768 // =0x8000
+; CHECK-NEXT:    prfm pldl1keep, [x0, x8]
+; CHECK-NEXT:    ret
+  %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 4096
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @reg(ptr %object, i64 %a) {
+; CHECK-LABEL: reg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfm pldl1keep, [x0, x1]
+; CHECK-NEXT:    ret
+  %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 %a
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @reg_shift(ptr %object, i64 %a) {
+; CHECK-LABEL: reg_shift:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfm pldl1keep, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
+  %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 %a
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @reg_sext(ptr %object, i32 %a) {
+; CHECK-LABEL: reg_sext:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfm pldl1keep, [x0, w1, sxtw]
+; CHECK-NEXT:    ret
+  %incdec.ptr = getelementptr inbounds i8, ptr %object, i32 %a
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @reg_sext_shift(ptr %object, i32 %a) {
+; CHECK-LABEL: reg_sext_shift:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfm pldl1keep, [x0, w1, sxtw #3]
+; CHECK-NEXT:    ret
+  %incdec.ptr = getelementptr inbounds i64, ptr %object, i32 %a
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @reg_zext(ptr %object, i32 %a) {
+; CHECK-LABEL: reg_zext:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfm pldl1keep, [x0, w1, uxtw]
+; CHECK-NEXT:    ret
+  %a.zext = zext i32 %a to i64
+  %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 %a.zext
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @reg_zext_shift(ptr %object, i32 %a) {
+; CHECK-LABEL: reg_zext_shift:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfm pldl1keep, [x0, w1, uxtw #3]
+; CHECK-NEXT:    ret
+  %a.zext = zext i32 %a to i64
+  %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 %a.zext
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}



More information about the llvm-commits mailing list