[llvm] 55f3df8 - [NVPTX] Fix and refine prefetch.* intrinsics (#126899)

via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 13 08:54:04 PST 2025


Author: Abhilash Majumder
Date: 2025-02-13T17:54:01+01:00
New Revision: 55f3df875d3a266c566f593357e2290879f49321

URL: https://github.com/llvm/llvm-project/commit/55f3df875d3a266c566f593357e2290879f49321
DIFF: https://github.com/llvm/llvm-project/commit/55f3df875d3a266c566f593357e2290879f49321.diff

LOG: [NVPTX] Fix and refine prefetch.* intrinsics (#126899)

This is follow-up PR from #125887  which fixes the intrinsic failures .

---------

Co-authored-by: abmajumder <abmajumder at nvidia.com>

Added: 
    

Modified: 
    llvm/docs/NVPTXUsage.rst
    llvm/include/llvm/IR/IntrinsicsNVVM.td
    llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
    llvm/test/CodeGen/NVPTX/prefetch.ll

Removed: 
    


################################################################################
diff  --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst
index 7eacc58549c7d..8550af456e961 100644
--- a/llvm/docs/NVPTXUsage.rst
+++ b/llvm/docs/NVPTXUsage.rst
@@ -598,18 +598,18 @@ Syntax:
 
 .. code-block:: llvm
 
-  declare void  @llvm.nvvm.prefetch.local.L1.evictnormal(ptr addrspace(5) %local_ptr)
-  declare void  @llvm.nvvm.prefetch.local.L2.evictnormal(ptr addrspace(5) %local_ptr)
+  declare void  @llvm.nvvm.prefetch.global.L1(ptr addrspace(1) %global_ptr)
+  declare void  @llvm.nvvm.prefetch.global.L2(ptr addrspace(1) %global_ptr)
+  declare void  @llvm.nvvm.prefetch.local.L1(ptr addrspace(5) %local_ptr)
+  declare void  @llvm.nvvm.prefetch.local.L2(ptr addrspace(5) %local_ptr)
   
-  declare void  @llvm.nvvm.prefetch.global.L1.evictnormal(ptr addrspace(1) %global_ptr)
-  declare void  @llvm.nvvm.prefetch.global.L2.evictnormal(ptr addrspace(1) %global_ptr)
-  declare void  @llvm.nvvm.prefetch.global.L1.evictlast(ptr addrspace(1) %global_ptr)
-  declare void  @llvm.nvvm.prefetch.global.L2.evictlast(ptr addrspace(1) %global_ptr)
+  declare void  @llvm.nvvm.prefetch.L1(ptr %ptr)
+  declare void  @llvm.nvvm.prefetch.L2(ptr %ptr)
   
-  declare void  @llvm.nvvm.prefetch.L1.evictnormal(ptr %ptr)
-  declare void  @llvm.nvvm.prefetch.L2.evictnormal(ptr %ptr)
-  
-  declare void  @llvm.nvvm.prefetchu.L1.evictnormal(ptr %ptr)
+  declare void  @llvm.nvvm.prefetch.global.L2.evict.normal(ptr addrspace(1) %global_ptr)
+  declare void  @llvm.nvvm.prefetch.global.L2.evict.last(ptr addrspace(1) %global_ptr)
+
+  declare void  @llvm.nvvm.prefetchu.L1(ptr %ptr)
 
 Overview:
 """""""""

diff  --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 6af1f2a166773..1a6aa17b531c6 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -5001,22 +5001,26 @@ foreach dim = [1, 2, 3, 4, 5] in {
 }
 
 // Intrinsics for Prefetch and Prefetchu
-foreach level = ["L1", "L2"] in {
-   foreach addr = ["global", "local", ""] in {
-     foreach evict = !if(!eq(addr, "global"), ["evictlast", "evictnormal"], ["evictnormal"]) in {
-      defvar suffix = "" # !if(!eq(addr, ""), "", addr # "_") # level # "_" # evict;             
-      def int_nvvm_prefetch_ # suffix : Intrinsic<[], 
-                                        !cond(
-                                        !eq(addr, "global") : [llvm_global_ptr_ty],
-                                        !eq(addr, "local") : [llvm_local_ptr_ty],
-                                        !eq(addr, "") : [llvm_ptr_ty]),
-                                        [IntrArgMemOnly, ReadOnly<ArgIndex<0>>,
-                                        NoCapture<ArgIndex<0>>]>;   
-    }
-  }
-}
+def int_nvvm_prefetch_L1 : Intrinsic<[], [llvm_ptr_ty],
+  [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+def int_nvvm_prefetch_L2 : Intrinsic<[], [llvm_ptr_ty],
+  [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+def int_nvvm_prefetch_global_L1 :  Intrinsic<[], [llvm_global_ptr_ty],
+  [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+def int_nvvm_prefetch_global_L2 :  Intrinsic<[], [llvm_global_ptr_ty],
+  [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+def int_nvvm_prefetch_local_L1 :  Intrinsic<[], [llvm_local_ptr_ty],
+  [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+def int_nvvm_prefetch_local_L2 :  Intrinsic<[], [llvm_local_ptr_ty],
+  [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+  
+def int_nvvm_prefetch_global_L2_evict_normal: Intrinsic<[], [llvm_global_ptr_ty],
+  [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+def int_nvvm_prefetch_global_L2_evict_last: Intrinsic<[], [llvm_global_ptr_ty],
+  [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+
 
-def int_nvvm_prefetchu_L1_evictnormal : Intrinsic<[], [llvm_ptr_ty],
+def int_nvvm_prefetchu_L1 : Intrinsic<[], [llvm_ptr_ty],
   [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
 
 

diff  --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 39dac65d67eb9..6a99a4b3b4f69 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -741,9 +741,6 @@ foreach dim = [1, 2, 3, 4, 5] in {
 }
 
 //Prefetch and Prefetchu 
-class Join<string sep, list<string> lst> {
-  string ret = !foldl("", lst, a, b, !if(!eq(a, ""), b, !if(!eq(b,""), a, !strconcat(a, sep, b))));
-}
 
 class PREFETCH_INTRS<string InstName> :
           NVPTXInst<(outs), (ins Int64Regs:$addr),
@@ -753,19 +750,25 @@ class PREFETCH_INTRS<string InstName> :
           Requires<[hasPTX<80>, hasSM<90>]>;
    
 
-// Only global supports evictlast and evictnormal.
-// Other variants (local and default) only support evictnormal
-foreach level = ["L1", "L2"] in {
-  foreach addr = ["global", "local", ""] in {
-    foreach evict = !if(!eq(addr, "global"), ["evictlast", "evictnormal"], ["evictnormal"]) in {
-      defvar suffix = Join<"_", [addr, level, evict]>.ret;
-      defvar inst_name = "prefetch." # !subst("_", ".", suffix);
-      def PREFETCH_# suffix : PREFETCH_INTRS<inst_name>;
-    }
-  }
-}
+def PREFETCH_L1 : PREFETCH_INTRS<"prefetch.L1">;
+def PREFETCH_L2 : PREFETCH_INTRS<"prefetch.L2">;
+def PREFETCH_GLOBAL_L1 : PREFETCH_INTRS<"prefetch.global.L1">;
+def PREFETCH_LOCAL_L1  : PREFETCH_INTRS<"prefetch.local.L1">;
+def PREFETCH_GLOBAL_L2 : PREFETCH_INTRS<"prefetch.global.L2">;
+def PREFETCH_LOCAL_L2 : PREFETCH_INTRS<"prefetch.local.L2">;
+
+def PREFETCH_GLOBAL_L2_EVICT_NORMAL : NVPTXInst<(outs), (ins Int64Regs:$addr),
+                                      "prefetch.global.L2::evict_normal" # " [$addr];",
+                                      [(!cast<Intrinsic>("int_nvvm_prefetch_global_L2_evict_normal") i64:$addr)]>,
+                                      Requires<[hasPTX<80>, hasSM<90>]>;
+
+def PREFETCH_GLOBAL_L2_EVICT_LAST   : NVPTXInst<(outs), (ins Int64Regs:$addr),
+                                      "prefetch.global.L2::evict_last" # " [$addr];",
+                                      [(!cast<Intrinsic>("int_nvvm_prefetch_global_L2_evict_last") i64:$addr)]>,
+                                      Requires<[hasPTX<80>, hasSM<90>]>;
+
 
-def PREFETCHU_L1_EVICTNORMAL : PREFETCH_INTRS<"prefetchu.L1.evictnormal">;
+def PREFETCHU_L1 : PREFETCH_INTRS<"prefetchu.L1">;
 
 //-----------------------------------
 // MBarrier Functions

diff  --git a/llvm/test/CodeGen/NVPTX/prefetch.ll b/llvm/test/CodeGen/NVPTX/prefetch.ll
index cf47000ffd9aa..68512bfac7a29 100644
--- a/llvm/test/CodeGen/NVPTX/prefetch.ll
+++ b/llvm/test/CodeGen/NVPTX/prefetch.ll
@@ -4,18 +4,18 @@
 
 target triple = "nvptx64-nvidia-cuda"
 
-declare void  @llvm.nvvm.prefetch.local.L1.evictnormal(ptr addrspace(5) %local_ptr)
-declare void  @llvm.nvvm.prefetch.local.L2.evictnormal(ptr addrspace(5) %local_ptr)
+declare void  @llvm.nvvm.prefetch.global.L1(ptr addrspace(1) %global_ptr)
+declare void  @llvm.nvvm.prefetch.global.L2(ptr addrspace(1) %global_ptr)
+declare void  @llvm.nvvm.prefetch.local.L1(ptr addrspace(5) %local_ptr)
+declare void  @llvm.nvvm.prefetch.local.L2(ptr addrspace(5) %local_ptr)
 
-declare void  @llvm.nvvm.prefetch.global.L1.evictnormal(ptr addrspace(1) %global_ptr)
-declare void  @llvm.nvvm.prefetch.global.L2.evictnormal(ptr addrspace(1) %global_ptr)
-declare void  @llvm.nvvm.prefetch.global.L1.evictlast(ptr addrspace(1) %global_ptr)
-declare void  @llvm.nvvm.prefetch.global.L2.evictlast(ptr addrspace(1) %global_ptr)
+declare void  @llvm.nvvm.prefetch.L1(ptr %ptr)
+declare void  @llvm.nvvm.prefetch.L2(ptr %ptr)
 
-declare void  @llvm.nvvm.prefetch.L1.evictnormal(ptr %ptr)
-declare void  @llvm.nvvm.prefetch.L2.evictnormal(ptr %ptr)
+declare void  @llvm.nvvm.prefetch.global.L2.evict.normal(ptr addrspace(1) %global_ptr)
+declare void  @llvm.nvvm.prefetch.global.L2.evict.last(ptr addrspace(1) %global_ptr)
 
-declare void  @llvm.nvvm.prefetchu.L1.evictnormal(ptr %ptr)
+declare void  @llvm.nvvm.prefetchu.L1(ptr %ptr)
 
 define void @prefetch_local(ptr addrspace(5) %local_ptr) {
 ; CHECK-PTX64-LABEL: prefetch_local(
@@ -24,11 +24,11 @@ define void @prefetch_local(ptr addrspace(5) %local_ptr) {
 ; CHECK-PTX64-EMPTY:
 ; CHECK-PTX64-NEXT:  // %bb.0:
 ; CHECK-PTX64-NEXT:    ld.param.u64 %rd1, [prefetch_local_param_0];
-; CHECK-PTX64-NEXT:    prefetch.local.L1.evictnormal [%rd1];
-; CHECK-PTX64-NEXT:    prefetch.local.L2.evictnormal [%rd1];
+; CHECK-PTX64-NEXT:    prefetch.local.L1 [%rd1];
+; CHECK-PTX64-NEXT:    prefetch.local.L2 [%rd1];
 ; CHECK-PTX64-NEXT:    ret;
-  tail call void @llvm.nvvm.prefetch.local.L1.evictnormal(ptr addrspace(5) %local_ptr)
-  tail call void @llvm.nvvm.prefetch.local.L2.evictnormal(ptr addrspace(5) %local_ptr)
+  tail call void @llvm.nvvm.prefetch.local.L1(ptr addrspace(5) %local_ptr)
+  tail call void @llvm.nvvm.prefetch.local.L2(ptr addrspace(5) %local_ptr)
   ret void
 }
 
@@ -39,15 +39,15 @@ define void @prefetch_global(ptr addrspace(1) %global_ptr) {
 ; CHECK-PTX64-EMPTY:
 ; CHECK-PTX64-NEXT:  // %bb.0:
 ; CHECK-PTX64-NEXT:    ld.param.u64 %rd1, [prefetch_global_param_0];
-; CHECK-PTX64-NEXT:    prefetch.global.L1.evictnormal [%rd1];
-; CHECK-PTX64-NEXT:    prefetch.global.L2.evictnormal [%rd1];
-; CHECK-PTX64-NEXT:    prefetch.global.L1.evictlast [%rd1];
-; CHECK-PTX64-NEXT:    prefetch.global.L2.evictlast [%rd1];
+; CHECK-PTX64-NEXT:    prefetch.global.L1 [%rd1];
+; CHECK-PTX64-NEXT:    prefetch.global.L2 [%rd1];
+; CHECK-PTX64-NEXT:    prefetch.global.L2::evict_normal [%rd1];
+; CHECK-PTX64-NEXT:    prefetch.global.L2::evict_last [%rd1];
 ; CHECK-PTX64-NEXT:    ret;
-  tail call void @llvm.nvvm.prefetch.global.L1.evictnormal(ptr addrspace(1) %global_ptr)
-  tail call void @llvm.nvvm.prefetch.global.L2.evictnormal(ptr addrspace(1) %global_ptr)
-  tail call void @llvm.nvvm.prefetch.global.L1.evictlast(ptr addrspace(1) %global_ptr)
-  tail call void @llvm.nvvm.prefetch.global.L2.evictlast(ptr addrspace(1) %global_ptr)
+  tail call void @llvm.nvvm.prefetch.global.L1(ptr addrspace(1) %global_ptr)
+  tail call void @llvm.nvvm.prefetch.global.L2(ptr addrspace(1) %global_ptr)
+  tail call void @llvm.nvvm.prefetch.global.L2.evict.normal(ptr addrspace(1) %global_ptr)
+  tail call void @llvm.nvvm.prefetch.global.L2.evict.last(ptr addrspace(1) %global_ptr)
   ret void
 }
 
@@ -59,11 +59,11 @@ define void @prefetch_(ptr %ptr) {
 ; CHECK-PTX64-EMPTY:
 ; CHECK-PTX64-NEXT:  // %bb.0:
 ; CHECK-PTX64-NEXT:    ld.param.u64 %rd1, [prefetch__param_0];
-; CHECK-PTX64-NEXT:    prefetch.L1.evictnormal [%rd1];
-; CHECK-PTX64-NEXT:    prefetch.L2.evictnormal [%rd1];
+; CHECK-PTX64-NEXT:    prefetch.L1 [%rd1];
+; CHECK-PTX64-NEXT:    prefetch.L2 [%rd1];
 ; CHECK-PTX64-NEXT:    ret;
-  tail call void @llvm.nvvm.prefetch.L1.evictnormal(ptr %ptr)
-  tail call void @llvm.nvvm.prefetch.L2.evictnormal(ptr %ptr)
+  tail call void @llvm.nvvm.prefetch.L1(ptr %ptr)
+  tail call void @llvm.nvvm.prefetch.L2(ptr %ptr)
   ret void
 }
 
@@ -74,8 +74,8 @@ define void @prefetchu_l1(ptr %ptr) {
 ; CHECK-PTX64-EMPTY:
 ; CHECK-PTX64-NEXT:  // %bb.0:
 ; CHECK-PTX64-NEXT:    ld.param.u64 %rd1, [prefetchu_l1_param_0];
-; CHECK-PTX64-NEXT:    prefetchu.L1.evictnormal [%rd1];
+; CHECK-PTX64-NEXT:    prefetchu.L1 [%rd1];
 ; CHECK-PTX64-NEXT:    ret;
-  tail call void @llvm.nvvm.prefetchu.L1.evictnormal(ptr %ptr)
+  tail call void @llvm.nvvm.prefetchu.L1(ptr %ptr)
   ret void
 }
\ No newline at end of file


        


More information about the llvm-commits mailing list