[llvm] [NVPTX] Fix and refine prefetch.* intrinsics (PR #126899)
Abhilash Majumder via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 12 04:53:07 PST 2025
https://github.com/abhilash1910 updated https://github.com/llvm/llvm-project/pull/126899
>From 68fe47536d122e87c3b5bb5510387fc549422ea9 Mon Sep 17 00:00:00 2001
From: abmajumder <abmajumder at nvidia.com>
Date: Wed, 12 Feb 2025 17:43:09 +0530
Subject: [PATCH 1/2] fix and refine intrinsics
---
llvm/docs/NVPTXUsage.rst | 15 ++++-----
llvm/include/llvm/IR/IntrinsicsNVVM.td | 27 +++++++--------
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 21 ++++--------
llvm/test/CodeGen/NVPTX/prefetch.ll | 43 ++++++++++--------------
4 files changed, 44 insertions(+), 62 deletions(-)
diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst
index 1680b11433537..5168c6cea99d1 100644
--- a/llvm/docs/NVPTXUsage.rst
+++ b/llvm/docs/NVPTXUsage.rst
@@ -589,16 +589,13 @@ Syntax:
.. code-block:: llvm
- declare void @llvm.nvvm.prefetch.local.L1.evictnormal(ptr addrspace(5) %local_ptr)
- declare void @llvm.nvvm.prefetch.local.L2.evictnormal(ptr addrspace(5) %local_ptr)
+ eclare void @llvm.nvvm.prefetch.global.L1(ptr addrspace(1) %global_ptr)
+ declare void @llvm.nvvm.prefetch.global.L2(ptr addrspace(1) %global_ptr)
+ declare void @llvm.nvvm.prefetch.local.L1(ptr addrspace(5) %local_ptr)
+ declare void @llvm.nvvm.prefetch.local.L2(ptr addrspace(5) %local_ptr)
- declare void @llvm.nvvm.prefetch.global.L1.evictnormal(ptr addrspace(1) %global_ptr)
- declare void @llvm.nvvm.prefetch.global.L2.evictnormal(ptr addrspace(1) %global_ptr)
- declare void @llvm.nvvm.prefetch.global.L1.evictlast(ptr addrspace(1) %global_ptr)
- declare void @llvm.nvvm.prefetch.global.L2.evictlast(ptr addrspace(1) %global_ptr)
-
- declare void @llvm.nvvm.prefetch.L1.evictnormal(ptr %ptr)
- declare void @llvm.nvvm.prefetch.L2.evictnormal(ptr %ptr)
+ declare void @llvm.nvvm.prefetch.L1(ptr %ptr)
+ declare void @llvm.nvvm.prefetch.L2(ptr %ptr)
declare void @llvm.nvvm.prefetchu.L1.evictnormal(ptr %ptr)
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 6af1f2a166773..19d1535e6215d 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -5001,20 +5001,19 @@ foreach dim = [1, 2, 3, 4, 5] in {
}
// Intrinsics for Prefetch and Prefetchu
-foreach level = ["L1", "L2"] in {
- foreach addr = ["global", "local", ""] in {
- foreach evict = !if(!eq(addr, "global"), ["evictlast", "evictnormal"], ["evictnormal"]) in {
- defvar suffix = "" # !if(!eq(addr, ""), "", addr # "_") # level # "_" # evict;
- def int_nvvm_prefetch_ # suffix : Intrinsic<[],
- !cond(
- !eq(addr, "global") : [llvm_global_ptr_ty],
- !eq(addr, "local") : [llvm_local_ptr_ty],
- !eq(addr, "") : [llvm_ptr_ty]),
- [IntrArgMemOnly, ReadOnly<ArgIndex<0>>,
- NoCapture<ArgIndex<0>>]>;
- }
- }
-}
+def int_nvvm_prefetch_L1 : Intrinsic<[], [llvm_ptr_ty],
+ [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+def int_nvvm_prefetch_L2 : Intrinsic<[], [llvm_ptr_ty],
+ [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+def int_nvvm_prefetch_global_L1 : Intrinsic<[], [llvm_global_ptr_ty],
+ [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+def int_nvvm_prefetch_global_L2 : Intrinsic<[], [llvm_global_ptr_ty],
+ [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+def int_nvvm_prefetch_local_L1 : Intrinsic<[], [llvm_local_ptr_ty],
+ [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+def int_nvvm_prefetch_local_L2 : Intrinsic<[], [llvm_local_ptr_ty],
+ [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+
def int_nvvm_prefetchu_L1_evictnormal : Intrinsic<[], [llvm_ptr_ty],
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 39dac65d67eb9..1b81072a19769 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -741,9 +741,6 @@ foreach dim = [1, 2, 3, 4, 5] in {
}
//Prefetch and Prefetchu
-class Join<string sep, list<string> lst> {
- string ret = !foldl("", lst, a, b, !if(!eq(a, ""), b, !if(!eq(b,""), a, !strconcat(a, sep, b))));
-}
class PREFETCH_INTRS<string InstName> :
NVPTXInst<(outs), (ins Int64Regs:$addr),
@@ -753,17 +750,13 @@ class PREFETCH_INTRS<string InstName> :
Requires<[hasPTX<80>, hasSM<90>]>;
-// Only global supports evictlast and evictnormal.
-// Other variants (local and default) only support evictnormal
-foreach level = ["L1", "L2"] in {
- foreach addr = ["global", "local", ""] in {
- foreach evict = !if(!eq(addr, "global"), ["evictlast", "evictnormal"], ["evictnormal"]) in {
- defvar suffix = Join<"_", [addr, level, evict]>.ret;
- defvar inst_name = "prefetch." # !subst("_", ".", suffix);
- def PREFETCH_# suffix : PREFETCH_INTRS<inst_name>;
- }
- }
-}
+def PREFETCH_L1 : PREFETCH_INTRS<"prefetch.L1">;
+def PREFETCH_L2 : PREFETCH_INTRS<"prefetch.L2">;
+def PREFETCH_GLOBAL_L1 : PREFETCH_INTRS<"prefetch.global.L1">;
+def PREFETCH_LOCAL_L1 : PREFETCH_INTRS<"prefetch.local.L1">;
+def PREFETCH_GLOBAL_L2 : PREFETCH_INTRS<"prefetch.global.L2">;
+def PREFETCH_LOCAL_L2 : PREFETCH_INTRS<"prefetch.local.L2">;
+
def PREFETCHU_L1_EVICTNORMAL : PREFETCH_INTRS<"prefetchu.L1.evictnormal">;
diff --git a/llvm/test/CodeGen/NVPTX/prefetch.ll b/llvm/test/CodeGen/NVPTX/prefetch.ll
index cf47000ffd9aa..70fd2a24fc586 100644
--- a/llvm/test/CodeGen/NVPTX/prefetch.ll
+++ b/llvm/test/CodeGen/NVPTX/prefetch.ll
@@ -4,16 +4,13 @@
target triple = "nvptx64-nvidia-cuda"
-declare void @llvm.nvvm.prefetch.local.L1.evictnormal(ptr addrspace(5) %local_ptr)
-declare void @llvm.nvvm.prefetch.local.L2.evictnormal(ptr addrspace(5) %local_ptr)
+declare void @llvm.nvvm.prefetch.global.L1(ptr addrspace(1) %global_ptr)
+declare void @llvm.nvvm.prefetch.global.L2(ptr addrspace(1) %global_ptr)
+declare void @llvm.nvvm.prefetch.local.L1(ptr addrspace(5) %local_ptr)
+declare void @llvm.nvvm.prefetch.local.L2(ptr addrspace(5) %local_ptr)
-declare void @llvm.nvvm.prefetch.global.L1.evictnormal(ptr addrspace(1) %global_ptr)
-declare void @llvm.nvvm.prefetch.global.L2.evictnormal(ptr addrspace(1) %global_ptr)
-declare void @llvm.nvvm.prefetch.global.L1.evictlast(ptr addrspace(1) %global_ptr)
-declare void @llvm.nvvm.prefetch.global.L2.evictlast(ptr addrspace(1) %global_ptr)
-
-declare void @llvm.nvvm.prefetch.L1.evictnormal(ptr %ptr)
-declare void @llvm.nvvm.prefetch.L2.evictnormal(ptr %ptr)
+declare void @llvm.nvvm.prefetch.L1(ptr %ptr)
+declare void @llvm.nvvm.prefetch.L2(ptr %ptr)
declare void @llvm.nvvm.prefetchu.L1.evictnormal(ptr %ptr)
@@ -24,11 +21,11 @@ define void @prefetch_local(ptr addrspace(5) %local_ptr) {
; CHECK-PTX64-EMPTY:
; CHECK-PTX64-NEXT: // %bb.0:
; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetch_local_param_0];
-; CHECK-PTX64-NEXT: prefetch.local.L1.evictnormal [%rd1];
-; CHECK-PTX64-NEXT: prefetch.local.L2.evictnormal [%rd1];
+; CHECK-PTX64-NEXT: prefetch.local.L1 [%rd1];
+; CHECK-PTX64-NEXT: prefetch.local.L2 [%rd1];
; CHECK-PTX64-NEXT: ret;
- tail call void @llvm.nvvm.prefetch.local.L1.evictnormal(ptr addrspace(5) %local_ptr)
- tail call void @llvm.nvvm.prefetch.local.L2.evictnormal(ptr addrspace(5) %local_ptr)
+ tail call void @llvm.nvvm.prefetch.local.L1(ptr addrspace(5) %local_ptr)
+ tail call void @llvm.nvvm.prefetch.local.L2(ptr addrspace(5) %local_ptr)
ret void
}
@@ -39,15 +36,11 @@ define void @prefetch_global(ptr addrspace(1) %global_ptr) {
; CHECK-PTX64-EMPTY:
; CHECK-PTX64-NEXT: // %bb.0:
; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetch_global_param_0];
-; CHECK-PTX64-NEXT: prefetch.global.L1.evictnormal [%rd1];
-; CHECK-PTX64-NEXT: prefetch.global.L2.evictnormal [%rd1];
-; CHECK-PTX64-NEXT: prefetch.global.L1.evictlast [%rd1];
-; CHECK-PTX64-NEXT: prefetch.global.L2.evictlast [%rd1];
+; CHECK-PTX64-NEXT: prefetch.global.L1 [%rd1];
+; CHECK-PTX64-NEXT: prefetch.global.L2 [%rd1];
; CHECK-PTX64-NEXT: ret;
- tail call void @llvm.nvvm.prefetch.global.L1.evictnormal(ptr addrspace(1) %global_ptr)
- tail call void @llvm.nvvm.prefetch.global.L2.evictnormal(ptr addrspace(1) %global_ptr)
- tail call void @llvm.nvvm.prefetch.global.L1.evictlast(ptr addrspace(1) %global_ptr)
- tail call void @llvm.nvvm.prefetch.global.L2.evictlast(ptr addrspace(1) %global_ptr)
+ tail call void @llvm.nvvm.prefetch.global.L1(ptr addrspace(1) %global_ptr)
+ tail call void @llvm.nvvm.prefetch.global.L2(ptr addrspace(1) %global_ptr)
ret void
}
@@ -59,11 +52,11 @@ define void @prefetch_(ptr %ptr) {
; CHECK-PTX64-EMPTY:
; CHECK-PTX64-NEXT: // %bb.0:
; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetch__param_0];
-; CHECK-PTX64-NEXT: prefetch.L1.evictnormal [%rd1];
-; CHECK-PTX64-NEXT: prefetch.L2.evictnormal [%rd1];
+; CHECK-PTX64-NEXT: prefetch.L1 [%rd1];
+; CHECK-PTX64-NEXT: prefetch.L2 [%rd1];
; CHECK-PTX64-NEXT: ret;
- tail call void @llvm.nvvm.prefetch.L1.evictnormal(ptr %ptr)
- tail call void @llvm.nvvm.prefetch.L2.evictnormal(ptr %ptr)
+ tail call void @llvm.nvvm.prefetch.L1(ptr %ptr)
+ tail call void @llvm.nvvm.prefetch.L2(ptr %ptr)
ret void
}
>From e5ae9253f858e194c577938e62033a88faf691f8 Mon Sep 17 00:00:00 2001
From: abmajumder <abmajumder at nvidia.com>
Date: Wed, 12 Feb 2025 18:22:38 +0530
Subject: [PATCH 2/2] refine intrinsics
---
llvm/docs/NVPTXUsage.rst | 5 ++++-
llvm/include/llvm/IR/IntrinsicsNVVM.td | 5 +++++
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 10 ++++++++++
llvm/test/CodeGen/NVPTX/prefetch.ll | 7 +++++++
4 files changed, 26 insertions(+), 1 deletion(-)
diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst
index 5168c6cea99d1..2237c10208a63 100644
--- a/llvm/docs/NVPTXUsage.rst
+++ b/llvm/docs/NVPTXUsage.rst
@@ -589,7 +589,7 @@ Syntax:
.. code-block:: llvm
- eclare void @llvm.nvvm.prefetch.global.L1(ptr addrspace(1) %global_ptr)
+ declare void @llvm.nvvm.prefetch.global.L1(ptr addrspace(1) %global_ptr)
declare void @llvm.nvvm.prefetch.global.L2(ptr addrspace(1) %global_ptr)
declare void @llvm.nvvm.prefetch.local.L1(ptr addrspace(5) %local_ptr)
declare void @llvm.nvvm.prefetch.local.L2(ptr addrspace(5) %local_ptr)
@@ -597,6 +597,9 @@ Syntax:
declare void @llvm.nvvm.prefetch.L1(ptr %ptr)
declare void @llvm.nvvm.prefetch.L2(ptr %ptr)
+ declare void @llvm.nvvm.prefetch.global.L2.evictnormal(ptr addrspace(1) %global_ptr)
+ declare void @llvm.nvvm.prefetch.global.L2.evictlast(ptr addrspace(1) %global_ptr)
+
declare void @llvm.nvvm.prefetchu.L1.evictnormal(ptr %ptr)
Overview:
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 19d1535e6215d..a4dda6d2bff55 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -5013,6 +5013,11 @@ def int_nvvm_prefetch_local_L1 : Intrinsic<[], [llvm_local_ptr_ty],
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
def int_nvvm_prefetch_local_L2 : Intrinsic<[], [llvm_local_ptr_ty],
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+
+def int_nvvm_prefetch_global_L2_evictnormal: Intrinsic<[], [llvm_global_ptr_ty],
+ [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+def int_nvvm_prefetch_global_L2_evictlast: Intrinsic<[], [llvm_global_ptr_ty],
+ [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
def int_nvvm_prefetchu_L1_evictnormal : Intrinsic<[], [llvm_ptr_ty],
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 1b81072a19769..de99f636368b1 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -757,6 +757,16 @@ def PREFETCH_LOCAL_L1 : PREFETCH_INTRS<"prefetch.local.L1">;
def PREFETCH_GLOBAL_L2 : PREFETCH_INTRS<"prefetch.global.L2">;
def PREFETCH_LOCAL_L2 : PREFETCH_INTRS<"prefetch.local.L2">;
+def PREFETCH_GLOBAL_L2_EVICT_NORMAL : NVPTXInst<(outs), (ins Int64Regs:$addr),
+ "prefetch.global.L2::evict_normal" # " [$addr];",
+ [(!cast<Intrinsic>("int_nvvm_prefetch_global_L2_evictnormal") i64:$addr)]>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+
+def PREFETCH_GLOBAL_L2_EVICT_LAST : NVPTXInst<(outs), (ins Int64Regs:$addr),
+ "prefetch.global.L2::evict_last" # " [$addr];",
+ [(!cast<Intrinsic>("int_nvvm_prefetch_global_L2_evictlast") i64:$addr)]>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+
def PREFETCHU_L1_EVICTNORMAL : PREFETCH_INTRS<"prefetchu.L1.evictnormal">;
diff --git a/llvm/test/CodeGen/NVPTX/prefetch.ll b/llvm/test/CodeGen/NVPTX/prefetch.ll
index 70fd2a24fc586..43cdb94de4175 100644
--- a/llvm/test/CodeGen/NVPTX/prefetch.ll
+++ b/llvm/test/CodeGen/NVPTX/prefetch.ll
@@ -12,6 +12,9 @@ declare void @llvm.nvvm.prefetch.local.L2(ptr addrspace(5) %local_ptr)
declare void @llvm.nvvm.prefetch.L1(ptr %ptr)
declare void @llvm.nvvm.prefetch.L2(ptr %ptr)
+declare void @llvm.nvvm.prefetch.global.L2.evictnormal(ptr addrspace(1) %global_ptr)
+declare void @llvm.nvvm.prefetch.global.L2.evictlast(ptr addrspace(1) %global_ptr)
+
declare void @llvm.nvvm.prefetchu.L1.evictnormal(ptr %ptr)
define void @prefetch_local(ptr addrspace(5) %local_ptr) {
@@ -38,9 +41,13 @@ define void @prefetch_global(ptr addrspace(1) %global_ptr) {
; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetch_global_param_0];
; CHECK-PTX64-NEXT: prefetch.global.L1 [%rd1];
; CHECK-PTX64-NEXT: prefetch.global.L2 [%rd1];
+; CHECK-PTX64-NEXT: prefetch.global.L2::evict_normal [%rd1];
+; CHECK-PTX64-NEXT: prefetch.global.L2::evict_last [%rd1];
; CHECK-PTX64-NEXT: ret;
tail call void @llvm.nvvm.prefetch.global.L1(ptr addrspace(1) %global_ptr)
tail call void @llvm.nvvm.prefetch.global.L2(ptr addrspace(1) %global_ptr)
+ tail call void @llvm.nvvm.prefetch.global.L2.evictnormal(ptr addrspace(1) %global_ptr)
+ tail call void @llvm.nvvm.prefetch.global.L2.evictlast(ptr addrspace(1) %global_ptr)
ret void
}
More information about the llvm-commits
mailing list