[llvm] 3d4964f - [NVPTX] add new sm90-specific intrinsics.

Artem Belevich via llvm-commits llvm-commits at lists.llvm.org
Thu May 25 11:58:42 PDT 2023


Author: Artem Belevich
Date: 2023-05-25T11:57:55-07:00
New Revision: 3d4964f4942ca74260228161778b570d4c908019

URL: https://github.com/llvm/llvm-project/commit/3d4964f4942ca74260228161778b570d4c908019
DIFF: https://github.com/llvm/llvm-project/commit/3d4964f4942ca74260228161778b570d4c908019.diff

LOG: [NVPTX] add new sm90-specific intrinsics.

Differential Revision: https://reviews.llvm.org/D151009

Added: 
    llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll

Modified: 
    llvm/include/llvm/IR/IntrinsicsNVVM.td
    llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
    llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 908fa3afb7ac4..5ffcd4bfad376 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -1569,25 +1569,29 @@ def int_nvvm_reflect :
 
 // isspacep.{const, global, local, shared}
 def int_nvvm_isspacep_const
-  : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], 
+  : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
               [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
               "llvm.nvvm.isspacep.const">,
     ClangBuiltin<"__nvvm_isspacep_const">;
 def int_nvvm_isspacep_global
-  : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], 
+  : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
               [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
               "llvm.nvvm.isspacep.global">,
     ClangBuiltin<"__nvvm_isspacep_global">;
 def int_nvvm_isspacep_local
-  : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], 
+  : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
               [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
               "llvm.nvvm.isspacep.local">,
     ClangBuiltin<"__nvvm_isspacep_local">;
 def int_nvvm_isspacep_shared
-  : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], 
+  : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
               [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
               "llvm.nvvm.isspacep.shared">,
     ClangBuiltin<"__nvvm_isspacep_shared">;
+def int_nvvm_isspacep_shared_cluster
+  : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
+              [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
+              "llvm.nvvm.isspacep.shared.cluster">;
 
 // Environment register read
 def int_nvvm_read_ptx_sreg_envreg0
@@ -4341,30 +4345,29 @@ def int_nvvm_swap_lo_hi_b64
 
 
 // Accessing special registers.
+
+class PTXReadSRegIntrinsicNB_r32
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>;
+class PTXReadSRegIntrinsic_r32<string name>
+  : PTXReadSRegIntrinsicNB_r32, ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
+
 multiclass PTXReadSRegIntrinsic_v4i32<string regname> {
 // FIXME: Do we need the 128-bit integer type version?
 //    def _r64   : Intrinsic<[llvm_i128_ty],   [], [IntrNoMem, IntrSpeculatable]>;
 
 // FIXME: Enable this once v4i32 support is enabled in back-end.
 //    def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
+  foreach suffix = ["_x", "_y", "_z", "_w"] in
+    def suffix : PTXReadSRegIntrinsic_r32<regname # suffix>;
+}
 
-  def _x     : DefaultAttrsIntrinsic<[llvm_i32_ty], [],
-                 [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>,
-               ClangBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_x">;
-  def _y     : DefaultAttrsIntrinsic<[llvm_i32_ty], [],
-                 [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>,
-               ClangBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_y">;
-  def _z     : DefaultAttrsIntrinsic<[llvm_i32_ty], [],
-                 [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>,
-               ClangBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_z">;
-  def _w     : DefaultAttrsIntrinsic<[llvm_i32_ty], [],
-                 [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>,
-               ClangBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_w">;
+// Same, but without automatic clang builtins. It will be used for
+// registers that require particular GPU or PTX version.
+multiclass PTXReadSRegIntrinsicNB_v4i32 {
+  foreach suffix = ["_x", "_y", "_z", "_w"] in
+    def suffix : PTXReadSRegIntrinsicNB_r32;
 }
 
-class PTXReadSRegIntrinsic_r32<string name>
-  : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>,
-    ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
 class PTXReadSRegIntrinsic_r64<string name>
   : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>,
     ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
@@ -4413,6 +4416,15 @@ def int_nvvm_read_ptx_sreg_pm3 : PTXReadNCSRegIntrinsic_r32<"pm3">;
 
 def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">;
 
+// sm90+, PTX7.8+
+defm int_nvvm_read_ptx_sreg_clusterid : PTXReadSRegIntrinsicNB_v4i32;
+defm int_nvvm_read_ptx_sreg_nclusterid : PTXReadSRegIntrinsicNB_v4i32;
+defm int_nvvm_read_ptx_sreg_cluster_ctaid : PTXReadSRegIntrinsicNB_v4i32;
+defm int_nvvm_read_ptx_sreg_cluster_nctaid : PTXReadSRegIntrinsicNB_v4i32;
+
+def int_nvvm_read_ptx_sreg_cluster_ctarank : PTXReadSRegIntrinsicNB_r32;
+def int_nvvm_read_ptx_sreg_cluster_nctarank : PTXReadSRegIntrinsicNB_r32;
+
 //
 // SHUFFLE
 //
@@ -4661,4 +4673,25 @@ foreach transposed = [0, 1] in {
   }
 }
 
+def int_nvvm_mapa
+  : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty],
+              [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
+              "llvm.nvvm.mapa">;
+def int_nvvm_mapa_shared_cluster
+  : DefaultAttrsIntrinsic<[llvm_shared_i8ptr_ty], [llvm_shared_i8ptr_ty, llvm_i32_ty],
+              [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
+              "llvm.nvvm.mapa.shared.cluster">;
+def int_nvvm_getctarank
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_ptr_ty],
+              [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
+              "llvm.nvvm.getctarank">;
+def int_nvvm_getctarank_shared_cluster
+  : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_shared_i8ptr_ty],
+              [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
+              "llvm.nvvm.getctarank.shared.cluster">;
+def int_nvvm_is_explicit_cluster
+  : DefaultAttrsIntrinsic<[llvm_i1_ty], [],
+              [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
+              "llvm.nvvm.is_explicit_cluster">;
+
 } // let TargetPrefix = "nvvm"

diff  --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index f5517b19c5a55..989556df66521 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -2480,41 +2480,24 @@ def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
 
 // isspacep
 
-def ISSPACEP_CONST_32
-  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
-              "isspacep.const \t$d, $a;",
-              [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
-    Requires<[hasPTX<31>]>;
-def ISSPACEP_CONST_64
-  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
-              "isspacep.const \t$d, $a;",
-              [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
-    Requires<[hasPTX<31>]>;
-def ISSPACEP_GLOBAL_32
-  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
-              "isspacep.global \t$d, $a;",
-              [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
-def ISSPACEP_GLOBAL_64
-  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
-              "isspacep.global \t$d, $a;",
-              [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
-def ISSPACEP_LOCAL_32
-  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
-              "isspacep.local \t$d, $a;",
-              [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
-def ISSPACEP_LOCAL_64
-  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
-              "isspacep.local \t$d, $a;",
-              [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
-def ISSPACEP_SHARED_32
-  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
-              "isspacep.shared \t$d, $a;",
-              [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
-def ISSPACEP_SHARED_64
-  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
-              "isspacep.shared \t$d, $a;",
-              [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
-
+multiclass ISSPACEP<string suffix, Intrinsic Intr, list<Predicate> Preds = []> {
+  def _32: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
+              "isspacep." # suffix # "\t$d, $a;",
+              [(set Int1Regs:$d, (Intr Int32Regs:$a))]>,
+    Requires<Preds>;
+  def _64: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
+              "isspacep." # suffix # "\t$d, $a;",
+              [(set Int1Regs:$d, (Intr Int64Regs:$a))]>,
+    Requires<Preds>;
+}
+
+defm isspace_const  : ISSPACEP<"const", int_nvvm_isspacep_const, [hasPTX<31>]>;
+defm isspace_global : ISSPACEP<"global", int_nvvm_isspacep_global>;
+defm isspace_local  : ISSPACEP<"local", int_nvvm_isspacep_local>;
+defm isspace_shared : ISSPACEP<"shared", int_nvvm_isspacep_shared>;
+defm isspace_shared_cluster : ISSPACEP<"shared::cluster",
+                                       int_nvvm_isspacep_shared_cluster,
+                                       [hasPTX<78>, hasSM<90>]>;
 
 // Special register reads
 def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
@@ -6213,35 +6196,51 @@ def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
 // Read Special Registers
 //-----------------------------------
 
-class PTX_READ_SREG_R64<string regname, Intrinsic intop>
+class PTX_READ_SREG_R64<string regname, Intrinsic intop, list<Predicate> Preds=[]>
   : NVPTXInst<(outs Int64Regs:$d), (ins),
               !strconcat("mov.u64 \t$d, %", regname, ";"),
-              [(set Int64Regs:$d, (intop))]>;
+              [(set Int64Regs:$d, (intop))]>,
+    Requires<Preds>;
 
-class PTX_READ_SREG_R32<string regname, Intrinsic intop>
+class PTX_READ_SREG_R32<string regname, Intrinsic intop, list<Predicate> Preds=[]>
   : NVPTXInst<(outs Int32Regs:$d), (ins),
               !strconcat("mov.u32 \t$d, %", regname, ";"),
-              [(set Int32Regs:$d, (intop))]>;
+              [(set Int32Regs:$d, (intop))]>,
+    Requires<Preds>;
+
+multiclass PTX_READ_SREG_R32V4<string regname, list<Predicate> Preds=[]> {
+   foreach suffix = ["x", "y", "z", "w"] in {
+      defvar reg = regname # "." # suffix;
+      defvar intr = !cast<Intrinsic>("int_nvvm_read_ptx_sreg_" # regname # "_" # suffix);
+      def "_"#suffix :  PTX_READ_SREG_R32<reg, intr, Preds>;
+   }
+}
 
 // TODO Add read vector-version of special registers
 
-def INT_PTX_SREG_TID_X :
-    PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
-def INT_PTX_SREG_TID_Y :
-    PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
-def INT_PTX_SREG_TID_Z :
-    PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
-def INT_PTX_SREG_TID_W :
-    PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
-
-def INT_PTX_SREG_NTID_X :
-    PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
-def INT_PTX_SREG_NTID_Y :
-    PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
-def INT_PTX_SREG_NTID_Z :
-    PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
-def INT_PTX_SREG_NTID_W :
-    PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
+defm INT_PTX_SREG_TID   : PTX_READ_SREG_R32V4<"tid">;
+defm INT_PTX_SREG_NTID  : PTX_READ_SREG_R32V4<"ntid">;
+defm INT_PTX_SREG_CTAID : PTX_READ_SREG_R32V4<"ctaid">;
+defm INT_PTX_SREG_NCTAID: PTX_READ_SREG_R32V4<"nctaid">;
+
+defm INT_PTX_SREG_CLUSTERID :
+       PTX_READ_SREG_R32V4<"clusterid", [hasSM<90>, hasPTX<78>]>;
+defm INT_PTX_SREG_NCLUSTERID :
+       PTX_READ_SREG_R32V4<"nclusterid", [hasSM<90>, hasPTX<78>]>;
+defm INT_PTX_SREG_CLUSTER_CTAID :
+       PTX_READ_SREG_R32V4<"cluster_ctaid", [hasSM<90>, hasPTX<78>]>;
+defm INT_PTX_SREG_CLUSTER_NCTAID:
+       PTX_READ_SREG_R32V4<"cluster_nctaid", [hasSM<90>, hasPTX<78>]>;
+
+def  INT_PTX_SREG_CLUSTER_CTARANK :
+       PTX_READ_SREG_R32<"cluster_ctarank",
+                         int_nvvm_read_ptx_sreg_cluster_ctarank,
+                         [hasSM<90>, hasPTX<78>]>;
+def  INT_PTX_SREG_CLUSTER_NCTARANK:
+       PTX_READ_SREG_R32<"cluster_nctarank",
+                         int_nvvm_read_ptx_sreg_cluster_nctarank,
+                         [hasSM<90>, hasPTX<78>]>;
+
 
 def INT_PTX_SREG_LANEID :
     PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
@@ -6249,25 +6248,6 @@ def INT_PTX_SREG_WARPID :
     PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
 def INT_PTX_SREG_NWARPID :
     PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
-
-def INT_PTX_SREG_CTAID_X :
-    PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
-def INT_PTX_SREG_CTAID_Y :
-    PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
-def INT_PTX_SREG_CTAID_Z :
-    PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
-def INT_PTX_SREG_CTAID_W :
-    PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
-
-def INT_PTX_SREG_NCTAID_X :
-    PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
-def INT_PTX_SREG_NCTAID_Y :
-    PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
-def INT_PTX_SREG_NCTAID_Z :
-    PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
-def INT_PTX_SREG_NCTAID_W :
-    PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
-
 def INT_PTX_SREG_SMID :
     PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
 def INT_PTX_SREG_NSMID :
@@ -6704,3 +6684,45 @@ class MMA_PAT<WMMA_INSTR wi>
 // Build intrinsic->instruction patterns for all MMA instructions.
 foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
   def : MMA_PAT<mma>;
+
+multiclass MAPA<string suffix, Intrinsic Intr> {
+  def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, Int32Regs:$b),
+              "mapa" # suffix # ".u32\t$d, $a, $b;",
+              [(set Int32Regs:$d, (Intr Int32Regs:$a, Int32Regs:$b))]>,
+    Requires<[hasSM<90>, hasPTX<78>]>;
+  def _32i: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, i32imm:$b),
+              "mapa" # suffix # ".u32\t$d, $a, $b;",
+              [(set Int32Regs:$d, (Intr Int32Regs:$a, imm:$b))]>,
+    Requires<[hasSM<90>, hasPTX<78>]>;
+  def _64: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, Int32Regs:$b),
+              "mapa" # suffix # ".u64\t$d, $a, $b;",
+              [(set Int64Regs:$d, (Intr Int64Regs:$a, Int32Regs:$b))]>,
+    Requires<[hasSM<90>, hasPTX<78>]>;
+  def _64i: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, i32imm:$b),
+              "mapa" # suffix # ".u64\t$d, $a, $b;",
+              [(set Int64Regs:$d, (Intr Int64Regs:$a, imm:$b))]>,
+    Requires<[hasSM<90>, hasPTX<78>]>;
+}
+
+defm mapa  : MAPA<"", int_nvvm_mapa>;
+defm mapa_shared_cluster  : MAPA<".shared::cluster", int_nvvm_mapa_shared_cluster>;
+
+
+multiclass GETCTARANK<string suffix, Intrinsic Intr> {
+  def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
+              "getctarank" # suffix # ".u32\t$d, $a;",
+              [(set Int32Regs:$d, (Intr Int32Regs:$a))]>,
+    Requires<[hasSM<90>, hasPTX<78>]>;
+  def _64: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+              "getctarank" # suffix # ".u64\t$d, $a;",
+              [(set Int32Regs:$d, (Intr Int64Regs:$a))]>,
+    Requires<[hasSM<90>, hasPTX<78>]>;
+}
+
+defm getctarank  : GETCTARANK<"", int_nvvm_getctarank>;
+defm getctarank_shared_cluster  : GETCTARANK<".shared::cluster", int_nvvm_getctarank_shared_cluster>;
+
+def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins),
+              "mov.pred\t$d, %is_explicit_cluster;",
+              [(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>,
+    Requires<[hasSM<90>, hasPTX<78>]>;

diff  --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 79d8c7122766a..3b74dbfeb89f5 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -291,6 +291,7 @@ NVPTXTargetMachine::getPredicatedAddrSpace(const Value *V) const {
     case Intrinsic::nvvm_isspacep_local:
       return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_LOCAL);
     case Intrinsic::nvvm_isspacep_shared:
+    case Intrinsic::nvvm_isspacep_shared_cluster:
       return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_SHARED);
     default:
       break;

diff  --git a/llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll b/llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll
new file mode 100644
index 0000000000000..83b4208e2580a
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll
@@ -0,0 +1,139 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx78| FileCheck --check-prefixes=CHECK %s
+; RUN: %if ptxas-11.8 %{ llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx78| %ptxas-verify -arch=sm_90 %}
+
+; CHECK-LABEL: test_isspacep
+define i1 @test_isspacep_shared_cluster(ptr %p) {
+; CHECK: isspacep.shared::cluster
+  %a = tail call i1 @llvm.nvvm.isspacep.shared.cluster(ptr %p)
+; CHECK: ret
+  ret i1 %a
+}
+
+; CHECK-LABEL: test_mapa(
+define ptr @test_mapa(ptr %p, i32 %r) {
+; CHECK64: mapa.u64
+  %a = call ptr @llvm.nvvm.mapa(ptr %p, i32 %r)
+  ret ptr %a
+}
+
+; CHECK-LABEL: test_mapa_shared_cluster(
+define ptr addrspace(3) @test_mapa_shared_cluster(ptr addrspace(3) %p, i32 %r) {
+; CHECK: mapa.shared::cluster.u64
+  %a = call ptr addrspace(3) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3) %p, i32 %r)
+  ret ptr addrspace(3) %a
+}
+
+; CHECK-LABEL: test_getctarank(
+define i32 @test_getctarank(ptr %p) {
+; CHECK: getctarank.u64
+  %a = call i32 @llvm.nvvm.getctarank(ptr %p)
+  ret i32 %a
+}
+
+; CHECK-LABEL: test_getctarank_shared_cluster(
+define i32 @test_getctarank_shared_cluster(ptr addrspace(3) %p) {
+; CHECK64: getctarank.shared::cluster.u64
+; CHECK32: getctarank.shared::cluster.u32
+  %a = call i32 @llvm.nvvm.getctarank.shared.cluster(ptr addrspace(3) %p)
+  ret i32 %a
+}
+
+; CHECK-LABEL: test_clusterid_x(
+define i32 @test_clusterid_x() {
+; CHECK: mov.u32 %r{{[0-9]+}}, %clusterid.x;
+; CHECK: ret;
+        %x = call i32 @llvm.nvvm.read.ptx.sreg.clusterid.x()
+        ret i32 %x
+}
+; CHECK-LABEL: test_clusterid_y(
+define i32 @test_clusterid_y() {
+; CHECK: mov.u32 %r{{[0-9]+}}, %clusterid.y;
+; CHECK: ret;
+        %x = call i32 @llvm.nvvm.read.ptx.sreg.clusterid.y()
+        ret i32 %x
+}
+; CHECK-LABEL: test_clusterid_z(
+define i32 @test_clusterid_z() {
+; CHECK: mov.u32 %r{{[0-9]+}}, %clusterid.z;
+; CHECK: ret;
+        %x = call i32 @llvm.nvvm.read.ptx.sreg.clusterid.z()
+        ret i32 %x
+}
+; CHECK-LABEL: test_clusterid_w(
+define i32 @test_clusterid_w() {
+; CHECK: mov.u32 %r{{[0-9]+}}, %clusterid.w;
+; CHECK: ret;
+        %x = call i32 @llvm.nvvm.read.ptx.sreg.clusterid.w()
+        ret i32 %x
+}
+
+; CHECK-LABEL: test_nclusterid_x(
+define i32 @test_nclusterid_x() {
+; CHECK: mov.u32 %r{{[0-9]+}}, %nclusterid.x;
+; CHECK: ret;
+        %x = call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.x()
+        ret i32 %x
+}
+; CHECK-LABEL: test_nclusterid_y(
+define i32 @test_nclusterid_y() {
+; CHECK: mov.u32 %r{{[0-9]+}}, %nclusterid.y;
+; CHECK: ret;
+        %x = call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.y()
+        ret i32 %x
+}
+; CHECK-LABEL: test_nclusterid_z(
+define i32 @test_nclusterid_z() {
+; CHECK: mov.u32 %r{{[0-9]+}}, %nclusterid.z;
+; CHECK: ret;
+        %x = call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.z()
+        ret i32 %x
+}
+; CHECK-LABEL: test_nclusterid_w(
+define i32 @test_nclusterid_w() {
+; CHECK: mov.u32 %r{{[0-9]+}}, %nclusterid.w;
+; CHECK: ret;
+        %x = call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.w()
+        ret i32 %x
+}
+
+; CHECK-LABEL: test_cluster_ctarank(
+define i32 @test_cluster_ctarank() {
+; CHECK: mov.u32 %r{{[0-9]+}}, %cluster_ctarank;
+; CHECK: ret;
+        %x = call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctarank()
+        ret i32 %x
+}
+
+; CHECK-LABEL: test_cluster_nctarank(
+define i32 @test_cluster_nctarank() {
+; CHECK: mov.u32 %r{{[0-9]+}}, %cluster_nctarank;
+; CHECK: ret;
+        %x = call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctarank()
+        ret i32 %x
+}
+
+; CHECK-LABEL: test_is_explicit_cluster(
+define i1 @test_is_explicit_cluster() {
+; CHECK: mov.pred %p{{[0-9]+}}, %is_explicit_cluster;
+; CHECK: ret;
+        %x = call i1 @llvm.nvvm.is_explicit_cluster()
+        ret i1 %x
+}
+
+
+declare i1 @llvm.nvvm.isspacep.shared.cluster(ptr %p);
+declare ptr @llvm.nvvm.mapa(ptr %p, i32 %r);
+declare ptr addrspace(3) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3) %p, i32 %r);
+declare i32 @llvm.nvvm.getctarank(ptr %p);
+declare i32 @llvm.nvvm.getctarank.shared.cluster(ptr addrspace(3) %p);
+declare i32 @llvm.nvvm.read.ptx.sreg.clusterid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.clusterid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.clusterid.z()
+declare i32 @llvm.nvvm.read.ptx.sreg.clusterid.w()
+declare i32 @llvm.nvvm.read.ptx.sreg.nclusterid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.nclusterid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.nclusterid.z()
+declare i32 @llvm.nvvm.read.ptx.sreg.nclusterid.w()
+declare i32 @llvm.nvvm.read.ptx.sreg.cluster.ctarank()
+declare i32 @llvm.nvvm.read.ptx.sreg.cluster.nctarank()
+declare i1 @llvm.nvvm.is_explicit_cluster()


        


More information about the llvm-commits mailing list