[llvm] 3d4964f - [NVPTX] add new sm90-specific intrinsics.
Artem Belevich via llvm-commits
llvm-commits at lists.llvm.org
Thu May 25 11:58:42 PDT 2023
Author: Artem Belevich
Date: 2023-05-25T11:57:55-07:00
New Revision: 3d4964f4942ca74260228161778b570d4c908019
URL: https://github.com/llvm/llvm-project/commit/3d4964f4942ca74260228161778b570d4c908019
DIFF: https://github.com/llvm/llvm-project/commit/3d4964f4942ca74260228161778b570d4c908019.diff
LOG: [NVPTX] add new sm90-specific intrinsics.
Differential Revision: https://reviews.llvm.org/D151009
Added:
llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll
Modified:
llvm/include/llvm/IR/IntrinsicsNVVM.td
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 908fa3afb7ac4..5ffcd4bfad376 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -1569,25 +1569,29 @@ def int_nvvm_reflect :
// isspacep.{const, global, local, shared}
def int_nvvm_isspacep_const
- : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
+ : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
[IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
"llvm.nvvm.isspacep.const">,
ClangBuiltin<"__nvvm_isspacep_const">;
def int_nvvm_isspacep_global
- : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
+ : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
[IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
"llvm.nvvm.isspacep.global">,
ClangBuiltin<"__nvvm_isspacep_global">;
def int_nvvm_isspacep_local
- : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
+ : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
[IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
"llvm.nvvm.isspacep.local">,
ClangBuiltin<"__nvvm_isspacep_local">;
def int_nvvm_isspacep_shared
- : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
+ : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
[IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
"llvm.nvvm.isspacep.shared">,
ClangBuiltin<"__nvvm_isspacep_shared">;
+def int_nvvm_isspacep_shared_cluster
+ : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
+ [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
+ "llvm.nvvm.isspacep.shared.cluster">;
// Environment register read
def int_nvvm_read_ptx_sreg_envreg0
@@ -4341,30 +4345,29 @@ def int_nvvm_swap_lo_hi_b64
// Accessing special registers.
+
+class PTXReadSRegIntrinsicNB_r32
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>;
+class PTXReadSRegIntrinsic_r32<string name>
+ : PTXReadSRegIntrinsicNB_r32, ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
+
multiclass PTXReadSRegIntrinsic_v4i32<string regname> {
// FIXME: Do we need the 128-bit integer type version?
// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem, IntrSpeculatable]>;
// FIXME: Enable this once v4i32 support is enabled in back-end.
// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
+ foreach suffix = ["_x", "_y", "_z", "_w"] in
+ def suffix : PTXReadSRegIntrinsic_r32<regname # suffix>;
+}
- def _x : DefaultAttrsIntrinsic<[llvm_i32_ty], [],
- [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>,
- ClangBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_x">;
- def _y : DefaultAttrsIntrinsic<[llvm_i32_ty], [],
- [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>,
- ClangBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_y">;
- def _z : DefaultAttrsIntrinsic<[llvm_i32_ty], [],
- [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>,
- ClangBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_z">;
- def _w : DefaultAttrsIntrinsic<[llvm_i32_ty], [],
- [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>,
- ClangBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_w">;
+// Same, but without automatic clang builtins. It will be used for
+// registers that require particular GPU or PTX version.
+multiclass PTXReadSRegIntrinsicNB_v4i32 {
+ foreach suffix = ["_x", "_y", "_z", "_w"] in
+ def suffix : PTXReadSRegIntrinsicNB_r32;
}
-class PTXReadSRegIntrinsic_r32<string name>
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>,
- ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
class PTXReadSRegIntrinsic_r64<string name>
: DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>,
ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
@@ -4413,6 +4416,15 @@ def int_nvvm_read_ptx_sreg_pm3 : PTXReadNCSRegIntrinsic_r32<"pm3">;
def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">;
+// sm90+, PTX7.8+
+defm int_nvvm_read_ptx_sreg_clusterid : PTXReadSRegIntrinsicNB_v4i32;
+defm int_nvvm_read_ptx_sreg_nclusterid : PTXReadSRegIntrinsicNB_v4i32;
+defm int_nvvm_read_ptx_sreg_cluster_ctaid : PTXReadSRegIntrinsicNB_v4i32;
+defm int_nvvm_read_ptx_sreg_cluster_nctaid : PTXReadSRegIntrinsicNB_v4i32;
+
+def int_nvvm_read_ptx_sreg_cluster_ctarank : PTXReadSRegIntrinsicNB_r32;
+def int_nvvm_read_ptx_sreg_cluster_nctarank : PTXReadSRegIntrinsicNB_r32;
+
//
// SHUFFLE
//
@@ -4661,4 +4673,25 @@ foreach transposed = [0, 1] in {
}
}
+def int_nvvm_mapa
+ : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
+ "llvm.nvvm.mapa">;
+def int_nvvm_mapa_shared_cluster
+ : DefaultAttrsIntrinsic<[llvm_shared_i8ptr_ty], [llvm_shared_i8ptr_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
+ "llvm.nvvm.mapa.shared.cluster">;
+def int_nvvm_getctarank
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_ptr_ty],
+ [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
+ "llvm.nvvm.getctarank">;
+def int_nvvm_getctarank_shared_cluster
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_shared_i8ptr_ty],
+ [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
+ "llvm.nvvm.getctarank.shared.cluster">;
+def int_nvvm_is_explicit_cluster
+ : DefaultAttrsIntrinsic<[llvm_i1_ty], [],
+ [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
+ "llvm.nvvm.is_explicit_cluster">;
+
} // let TargetPrefix = "nvvm"
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index f5517b19c5a55..989556df66521 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -2480,41 +2480,24 @@ def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
// isspacep
-def ISSPACEP_CONST_32
- : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
- "isspacep.const \t$d, $a;",
- [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
- Requires<[hasPTX<31>]>;
-def ISSPACEP_CONST_64
- : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
- "isspacep.const \t$d, $a;",
- [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
- Requires<[hasPTX<31>]>;
-def ISSPACEP_GLOBAL_32
- : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
- "isspacep.global \t$d, $a;",
- [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
-def ISSPACEP_GLOBAL_64
- : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
- "isspacep.global \t$d, $a;",
- [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
-def ISSPACEP_LOCAL_32
- : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
- "isspacep.local \t$d, $a;",
- [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
-def ISSPACEP_LOCAL_64
- : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
- "isspacep.local \t$d, $a;",
- [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
-def ISSPACEP_SHARED_32
- : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
- "isspacep.shared \t$d, $a;",
- [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
-def ISSPACEP_SHARED_64
- : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
- "isspacep.shared \t$d, $a;",
- [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
-
+multiclass ISSPACEP<string suffix, Intrinsic Intr, list<Predicate> Preds = []> {
+ def _32: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
+ "isspacep." # suffix # "\t$d, $a;",
+ [(set Int1Regs:$d, (Intr Int32Regs:$a))]>,
+ Requires<Preds>;
+ def _64: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
+ "isspacep." # suffix # "\t$d, $a;",
+ [(set Int1Regs:$d, (Intr Int64Regs:$a))]>,
+ Requires<Preds>;
+}
+
+defm isspace_const : ISSPACEP<"const", int_nvvm_isspacep_const, [hasPTX<31>]>;
+defm isspace_global : ISSPACEP<"global", int_nvvm_isspacep_global>;
+defm isspace_local : ISSPACEP<"local", int_nvvm_isspacep_local>;
+defm isspace_shared : ISSPACEP<"shared", int_nvvm_isspacep_shared>;
+defm isspace_shared_cluster : ISSPACEP<"shared::cluster",
+ int_nvvm_isspacep_shared_cluster,
+ [hasPTX<78>, hasSM<90>]>;
// Special register reads
def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
@@ -6213,35 +6196,51 @@ def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
// Read Special Registers
//-----------------------------------
-class PTX_READ_SREG_R64<string regname, Intrinsic intop>
+class PTX_READ_SREG_R64<string regname, Intrinsic intop, list<Predicate> Preds=[]>
: NVPTXInst<(outs Int64Regs:$d), (ins),
!strconcat("mov.u64 \t$d, %", regname, ";"),
- [(set Int64Regs:$d, (intop))]>;
+ [(set Int64Regs:$d, (intop))]>,
+ Requires<Preds>;
-class PTX_READ_SREG_R32<string regname, Intrinsic intop>
+class PTX_READ_SREG_R32<string regname, Intrinsic intop, list<Predicate> Preds=[]>
: NVPTXInst<(outs Int32Regs:$d), (ins),
!strconcat("mov.u32 \t$d, %", regname, ";"),
- [(set Int32Regs:$d, (intop))]>;
+ [(set Int32Regs:$d, (intop))]>,
+ Requires<Preds>;
+
+multiclass PTX_READ_SREG_R32V4<string regname, list<Predicate> Preds=[]> {
+ foreach suffix = ["x", "y", "z", "w"] in {
+ defvar reg = regname # "." # suffix;
+ defvar intr = !cast<Intrinsic>("int_nvvm_read_ptx_sreg_" # regname # "_" # suffix);
+ def "_"#suffix : PTX_READ_SREG_R32<reg, intr, Preds>;
+ }
+}
// TODO Add read vector-version of special registers
-def INT_PTX_SREG_TID_X :
- PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
-def INT_PTX_SREG_TID_Y :
- PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
-def INT_PTX_SREG_TID_Z :
- PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
-def INT_PTX_SREG_TID_W :
- PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
-
-def INT_PTX_SREG_NTID_X :
- PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
-def INT_PTX_SREG_NTID_Y :
- PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
-def INT_PTX_SREG_NTID_Z :
- PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
-def INT_PTX_SREG_NTID_W :
- PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
+defm INT_PTX_SREG_TID : PTX_READ_SREG_R32V4<"tid">;
+defm INT_PTX_SREG_NTID : PTX_READ_SREG_R32V4<"ntid">;
+defm INT_PTX_SREG_CTAID : PTX_READ_SREG_R32V4<"ctaid">;
+defm INT_PTX_SREG_NCTAID: PTX_READ_SREG_R32V4<"nctaid">;
+
+defm INT_PTX_SREG_CLUSTERID :
+ PTX_READ_SREG_R32V4<"clusterid", [hasSM<90>, hasPTX<78>]>;
+defm INT_PTX_SREG_NCLUSTERID :
+ PTX_READ_SREG_R32V4<"nclusterid", [hasSM<90>, hasPTX<78>]>;
+defm INT_PTX_SREG_CLUSTER_CTAID :
+ PTX_READ_SREG_R32V4<"cluster_ctaid", [hasSM<90>, hasPTX<78>]>;
+defm INT_PTX_SREG_CLUSTER_NCTAID:
+ PTX_READ_SREG_R32V4<"cluster_nctaid", [hasSM<90>, hasPTX<78>]>;
+
+def INT_PTX_SREG_CLUSTER_CTARANK :
+ PTX_READ_SREG_R32<"cluster_ctarank",
+ int_nvvm_read_ptx_sreg_cluster_ctarank,
+ [hasSM<90>, hasPTX<78>]>;
+def INT_PTX_SREG_CLUSTER_NCTARANK:
+ PTX_READ_SREG_R32<"cluster_nctarank",
+ int_nvvm_read_ptx_sreg_cluster_nctarank,
+ [hasSM<90>, hasPTX<78>]>;
+
def INT_PTX_SREG_LANEID :
PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
@@ -6249,25 +6248,6 @@ def INT_PTX_SREG_WARPID :
PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
def INT_PTX_SREG_NWARPID :
PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
-
-def INT_PTX_SREG_CTAID_X :
- PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
-def INT_PTX_SREG_CTAID_Y :
- PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
-def INT_PTX_SREG_CTAID_Z :
- PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
-def INT_PTX_SREG_CTAID_W :
- PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
-
-def INT_PTX_SREG_NCTAID_X :
- PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
-def INT_PTX_SREG_NCTAID_Y :
- PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
-def INT_PTX_SREG_NCTAID_Z :
- PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
-def INT_PTX_SREG_NCTAID_W :
- PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
-
def INT_PTX_SREG_SMID :
PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
def INT_PTX_SREG_NSMID :
@@ -6704,3 +6684,45 @@ class MMA_PAT<WMMA_INSTR wi>
// Build intrinsic->instruction patterns for all MMA instructions.
foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
def : MMA_PAT<mma>;
+
+multiclass MAPA<string suffix, Intrinsic Intr> {
+ def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, Int32Regs:$b),
+ "mapa" # suffix # ".u32\t$d, $a, $b;",
+ [(set Int32Regs:$d, (Intr Int32Regs:$a, Int32Regs:$b))]>,
+ Requires<[hasSM<90>, hasPTX<78>]>;
+ def _32i: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, i32imm:$b),
+ "mapa" # suffix # ".u32\t$d, $a, $b;",
+ [(set Int32Regs:$d, (Intr Int32Regs:$a, imm:$b))]>,
+ Requires<[hasSM<90>, hasPTX<78>]>;
+ def _64: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, Int32Regs:$b),
+ "mapa" # suffix # ".u64\t$d, $a, $b;",
+ [(set Int64Regs:$d, (Intr Int64Regs:$a, Int32Regs:$b))]>,
+ Requires<[hasSM<90>, hasPTX<78>]>;
+ def _64i: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, i32imm:$b),
+ "mapa" # suffix # ".u64\t$d, $a, $b;",
+ [(set Int64Regs:$d, (Intr Int64Regs:$a, imm:$b))]>,
+ Requires<[hasSM<90>, hasPTX<78>]>;
+}
+
+defm mapa : MAPA<"", int_nvvm_mapa>;
+defm mapa_shared_cluster : MAPA<".shared::cluster", int_nvvm_mapa_shared_cluster>;
+
+
+multiclass GETCTARANK<string suffix, Intrinsic Intr> {
+ def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
+ "getctarank" # suffix # ".u32\t$d, $a;",
+ [(set Int32Regs:$d, (Intr Int32Regs:$a))]>,
+ Requires<[hasSM<90>, hasPTX<78>]>;
+ def _64: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "getctarank" # suffix # ".u64\t$d, $a;",
+ [(set Int32Regs:$d, (Intr Int64Regs:$a))]>,
+ Requires<[hasSM<90>, hasPTX<78>]>;
+}
+
+defm getctarank : GETCTARANK<"", int_nvvm_getctarank>;
+defm getctarank_shared_cluster : GETCTARANK<".shared::cluster", int_nvvm_getctarank_shared_cluster>;
+
+def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins),
+ "mov.pred\t$d, %is_explicit_cluster;",
+ [(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>,
+ Requires<[hasSM<90>, hasPTX<78>]>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 79d8c7122766a..3b74dbfeb89f5 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -291,6 +291,7 @@ NVPTXTargetMachine::getPredicatedAddrSpace(const Value *V) const {
case Intrinsic::nvvm_isspacep_local:
return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_LOCAL);
case Intrinsic::nvvm_isspacep_shared:
+ case Intrinsic::nvvm_isspacep_shared_cluster:
return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_SHARED);
default:
break;
diff --git a/llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll b/llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll
new file mode 100644
index 0000000000000..83b4208e2580a
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll
@@ -0,0 +1,139 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx78| FileCheck --check-prefixes=CHECK %s
+; RUN: %if ptxas-11.8 %{ llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx78| %ptxas-verify -arch=sm_90 %}
+
+; CHECK-LABEL: test_isspacep
+define i1 @test_isspacep_shared_cluster(ptr %p) {
+; CHECK: isspacep.shared::cluster
+ %a = tail call i1 @llvm.nvvm.isspacep.shared.cluster(ptr %p)
+; CHECK: ret
+ ret i1 %a
+}
+
+; CHECK-LABEL: test_mapa(
+define ptr @test_mapa(ptr %p, i32 %r) {
+; CHECK64: mapa.u64
+ %a = call ptr @llvm.nvvm.mapa(ptr %p, i32 %r)
+ ret ptr %a
+}
+
+; CHECK-LABEL: test_mapa_shared_cluster(
+define ptr addrspace(3) @test_mapa_shared_cluster(ptr addrspace(3) %p, i32 %r) {
+; CHECK: mapa.shared::cluster.u64
+ %a = call ptr addrspace(3) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3) %p, i32 %r)
+ ret ptr addrspace(3) %a
+}
+
+; CHECK-LABEL: test_getctarank(
+define i32 @test_getctarank(ptr %p) {
+; CHECK: getctarank.u64
+ %a = call i32 @llvm.nvvm.getctarank(ptr %p)
+ ret i32 %a
+}
+
+; CHECK-LABEL: test_getctarank_shared_cluster(
+define i32 @test_getctarank_shared_cluster(ptr addrspace(3) %p) {
+; CHECK64: getctarank.shared::cluster.u64
+; CHECK32: getctarank.shared::cluster.u32
+ %a = call i32 @llvm.nvvm.getctarank.shared.cluster(ptr addrspace(3) %p)
+ ret i32 %a
+}
+
+; CHECK-LABEL: test_clusterid_x(
+define i32 @test_clusterid_x() {
+; CHECK: mov.u32 %r{{[0-9]+}}, %clusterid.x;
+; CHECK: ret;
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.clusterid.x()
+ ret i32 %x
+}
+; CHECK-LABEL: test_clusterid_y(
+define i32 @test_clusterid_y() {
+; CHECK: mov.u32 %r{{[0-9]+}}, %clusterid.y;
+; CHECK: ret;
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.clusterid.y()
+ ret i32 %x
+}
+; CHECK-LABEL: test_clusterid_z(
+define i32 @test_clusterid_z() {
+; CHECK: mov.u32 %r{{[0-9]+}}, %clusterid.z;
+; CHECK: ret;
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.clusterid.z()
+ ret i32 %x
+}
+; CHECK-LABEL: test_clusterid_w(
+define i32 @test_clusterid_w() {
+; CHECK: mov.u32 %r{{[0-9]+}}, %clusterid.w;
+; CHECK: ret;
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.clusterid.w()
+ ret i32 %x
+}
+
+; CHECK-LABEL: test_nclusterid_x(
+define i32 @test_nclusterid_x() {
+; CHECK: mov.u32 %r{{[0-9]+}}, %nclusterid.x;
+; CHECK: ret;
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.x()
+ ret i32 %x
+}
+; CHECK-LABEL: test_nclusterid_y(
+define i32 @test_nclusterid_y() {
+; CHECK: mov.u32 %r{{[0-9]+}}, %nclusterid.y;
+; CHECK: ret;
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.y()
+ ret i32 %x
+}
+; CHECK-LABEL: test_nclusterid_z(
+define i32 @test_nclusterid_z() {
+; CHECK: mov.u32 %r{{[0-9]+}}, %nclusterid.z;
+; CHECK: ret;
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.z()
+ ret i32 %x
+}
+; CHECK-LABEL: test_nclusterid_w(
+define i32 @test_nclusterid_w() {
+; CHECK: mov.u32 %r{{[0-9]+}}, %nclusterid.w;
+; CHECK: ret;
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.w()
+ ret i32 %x
+}
+
+; CHECK-LABEL: test_cluster_ctarank(
+define i32 @test_cluster_ctarank() {
+; CHECK: mov.u32 %r{{[0-9]+}}, %cluster_ctarank;
+; CHECK: ret;
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctarank()
+ ret i32 %x
+}
+
+; CHECK-LABEL: test_cluster_nctarank(
+define i32 @test_cluster_nctarank() {
+; CHECK: mov.u32 %r{{[0-9]+}}, %cluster_nctarank;
+; CHECK: ret;
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctarank()
+ ret i32 %x
+}
+
+; CHECK-LABEL: test_is_explicit_cluster(
+define i1 @test_is_explicit_cluster() {
+; CHECK: mov.pred %p{{[0-9]+}}, %is_explicit_cluster;
+; CHECK: ret;
+ %x = call i1 @llvm.nvvm.is_explicit_cluster()
+ ret i1 %x
+}
+
+
+declare i1 @llvm.nvvm.isspacep.shared.cluster(ptr %p);
+declare ptr @llvm.nvvm.mapa(ptr %p, i32 %r);
+declare ptr addrspace(3) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3) %p, i32 %r);
+declare i32 @llvm.nvvm.getctarank(ptr %p);
+declare i32 @llvm.nvvm.getctarank.shared.cluster(ptr addrspace(3) %p);
+declare i32 @llvm.nvvm.read.ptx.sreg.clusterid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.clusterid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.clusterid.z()
+declare i32 @llvm.nvvm.read.ptx.sreg.clusterid.w()
+declare i32 @llvm.nvvm.read.ptx.sreg.nclusterid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.nclusterid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.nclusterid.z()
+declare i32 @llvm.nvvm.read.ptx.sreg.nclusterid.w()
+declare i32 @llvm.nvvm.read.ptx.sreg.cluster.ctarank()
+declare i32 @llvm.nvvm.read.ptx.sreg.cluster.nctarank()
+declare i1 @llvm.nvvm.is_explicit_cluster()
More information about the llvm-commits
mailing list