[clang] 0a0bae1 - [CUDA] plumb through new sm_90-specific builtins.

Artem Belevich via cfe-commits cfe-commits at lists.llvm.org
Thu May 25 11:58:44 PDT 2023


Author: Artem Belevich
Date: 2023-05-25T11:57:56-07:00
New Revision: 0a0bae1e9f94ec86ac17b0b4eb817741689f3739

URL: https://github.com/llvm/llvm-project/commit/0a0bae1e9f94ec86ac17b0b4eb817741689f3739
DIFF: https://github.com/llvm/llvm-project/commit/0a0bae1e9f94ec86ac17b0b4eb817741689f3739.diff

LOG: [CUDA] plumb through new sm_90-specific builtins.

Differential Revision: https://reviews.llvm.org/D151168

Added: 
    clang/test/CodeGenCUDA/builtins-sm90.cu

Modified: 
    clang/include/clang/Basic/BuiltinsNVPTX.def
    clang/lib/CodeGen/CGBuiltin.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def
index f678f935c5d65..39a34eb296115 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.def
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -99,6 +99,31 @@ BUILTIN(__nvvm_read_ptx_sreg_nctaid_y, "i", "nc")
 BUILTIN(__nvvm_read_ptx_sreg_nctaid_z, "i", "nc")
 BUILTIN(__nvvm_read_ptx_sreg_nctaid_w, "i", "nc")
 
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_x, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_y, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_z, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_w, "i", "nc", AND(SM_90, PTX78))
+
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_x, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_y, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_z, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_w, "i", "nc", AND(SM_90, PTX78))
+
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_x, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_y, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_z, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_w, "i", "nc", AND(SM_90, PTX78))
+
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_x, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_y, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_z, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_w, "i", "nc", AND(SM_90, PTX78))
+
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctarank, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctarank, "i", "nc", AND(SM_90, PTX78))
+
+TARGET_BUILTIN(__nvvm_is_explicit_cluster, "b", "nc", AND(SM_90, PTX78))
+
 BUILTIN(__nvvm_read_ptx_sreg_laneid, "i", "nc")
 BUILTIN(__nvvm_read_ptx_sreg_warpid, "i", "nc")
 BUILTIN(__nvvm_read_ptx_sreg_nwarpid, "i", "nc")
@@ -865,6 +890,7 @@ BUILTIN(__nvvm_isspacep_const, "bvC*", "nc")
 BUILTIN(__nvvm_isspacep_global, "bvC*", "nc")
 BUILTIN(__nvvm_isspacep_local, "bvC*", "nc")
 BUILTIN(__nvvm_isspacep_shared, "bvC*", "nc")
+TARGET_BUILTIN(__nvvm_isspacep_shared_cluster,"bvC*", "nc", AND(SM_90,PTX78))
 
 // Builtins to support WMMA instructions on sm_70
 TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX60))
@@ -988,6 +1014,11 @@ TARGET_BUILTIN(__nvvm_abs_bf16x2, "ZUiZUi", "", AND(SM_80,PTX70))
 TARGET_BUILTIN(__nvvm_neg_bf16, "UsUs", "", AND(SM_80,PTX70))
 TARGET_BUILTIN(__nvvm_neg_bf16x2, "ZUiZUi", "", AND(SM_80,PTX70))
 
+TARGET_BUILTIN(__nvvm_mapa, "v*v*i", "", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_mapa_shared_cluster, "v*3v*3i", "", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_getctarank, "iv*", "", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
 #pragma pop_macro("AND")

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 301248d98d20d..9b151befee9b7 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18885,6 +18885,83 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
     return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
                        Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,
                        16);
+  case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
+  case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
+  case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
+  case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
+  case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
+  case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
+  case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
+  case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
+  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
+  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
+  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
+  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
+  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
+  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
+  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
+  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
+  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
+  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
+  case NVPTX::BI__nvvm_is_explicit_cluster:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
+  case NVPTX::BI__nvvm_isspacep_shared_cluster:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
+        EmitScalarExpr(E->getArg(0)));
+  case NVPTX::BI__nvvm_mapa:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_mapa),
+        {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
+  case NVPTX::BI__nvvm_mapa_shared_cluster:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
+        {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
+  case NVPTX::BI__nvvm_getctarank:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
+        EmitScalarExpr(E->getArg(0)));
+  case NVPTX::BI__nvvm_getctarank_shared_cluster:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
+        EmitScalarExpr(E->getArg(0)));
   default:
     return nullptr;
   }

diff  --git a/clang/test/CodeGenCUDA/builtins-sm90.cu b/clang/test/CodeGenCUDA/builtins-sm90.cu
new file mode 100644
index 0000000000000..4f67c0e93cfe5
--- /dev/null
+++ b/clang/test/CodeGenCUDA/builtins-sm90.cu
@@ -0,0 +1,61 @@
+// RUN: %clang_cc1 "-triple" "nvptx64-nvidia-cuda" "-target-feature" "+ptx78" "-target-cpu" "sm_90" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
+
+// CHECK: define{{.*}} void @_Z6kernelPlPvj(
+__attribute__((global)) void kernel(long *out, void *ptr, unsigned u) {
+  int i = 0;
+  // CHECK: call i1 @llvm.nvvm.isspacep.shared.cluster
+  out[i++] = __nvvm_isspacep_shared_cluster(ptr);
+
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clusterid.x()
+  out[i++] = __nvvm_read_ptx_sreg_clusterid_x();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clusterid.y()
+  out[i++] = __nvvm_read_ptx_sreg_clusterid_y();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clusterid.z()
+  out[i++] = __nvvm_read_ptx_sreg_clusterid_z();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clusterid.w()
+  out[i++] = __nvvm_read_ptx_sreg_clusterid_w();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.x()
+  out[i++] = __nvvm_read_ptx_sreg_nclusterid_x();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.y()
+  out[i++] = __nvvm_read_ptx_sreg_nclusterid_y();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.z()
+  out[i++] = __nvvm_read_ptx_sreg_nclusterid_z();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.w()
+  out[i++] = __nvvm_read_ptx_sreg_nclusterid_w();
+
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.x()
+  out[i++] = __nvvm_read_ptx_sreg_cluster_ctaid_x();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.y()
+  out[i++] = __nvvm_read_ptx_sreg_cluster_ctaid_y();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.z()
+  out[i++] = __nvvm_read_ptx_sreg_cluster_ctaid_z();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.w()
+  out[i++] = __nvvm_read_ptx_sreg_cluster_ctaid_w();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.x()
+  out[i++] = __nvvm_read_ptx_sreg_cluster_nctaid_x();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.y()
+  out[i++] = __nvvm_read_ptx_sreg_cluster_nctaid_y();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.z()
+  out[i++] = __nvvm_read_ptx_sreg_cluster_nctaid_z();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.w()
+  out[i++] = __nvvm_read_ptx_sreg_cluster_nctaid_w();
+
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctarank()
+  out[i++] = __nvvm_read_ptx_sreg_cluster_ctarank();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctarank()
+  out[i++] = __nvvm_read_ptx_sreg_cluster_nctarank();
+  // CHECK: call i1 @llvm.nvvm.is_explicit_cluster()
+  out[i++] = __nvvm_is_explicit_cluster();
+
+  auto * sptr = (__attribute__((address_space(3))) void *)ptr;
+  // CHECK: call ptr @llvm.nvvm.mapa(ptr %{{.*}}, i32 %{{.*}})
+  out[i++] = (long) __nvvm_mapa(ptr, u);
+  // CHECK: call ptr addrspace(3) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3) %{{.*}}, i32 %{{.*}})
+  out[i++] = (long) __nvvm_mapa_shared_cluster(sptr, u);
+  // CHECK: call i32 @llvm.nvvm.getctarank(ptr {{.*}})
+  out[i++] = __nvvm_getctarank(ptr);
+  // CHECK: call i32 @llvm.nvvm.getctarank.shared.cluster(ptr addrspace(3) {{.*}})
+  out[i++] = __nvvm_getctarank_shared_cluster(sptr);
+
+  // CHECK: ret void
+}


        


More information about the cfe-commits mailing list