[llvm-branch-commits] [clang] b84ffb9 - [CUDA] Add support for sm101 and sm120 target architectures (#127187)
Tom Stellard via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Feb 21 14:07:16 PST 2025
Author: Sebastian Jodłowski
Date: 2025-02-21T14:06:54-08:00
New Revision: b84ffb9f3b349dd4548a9d3c0ead91021b7905a3
URL: https://github.com/llvm/llvm-project/commit/b84ffb9f3b349dd4548a9d3c0ead91021b7905a3
DIFF: https://github.com/llvm/llvm-project/commit/b84ffb9f3b349dd4548a9d3c0ead91021b7905a3.diff
LOG: [CUDA] Add support for sm101 and sm120 target architectures (#127187)
Add support for sm101 and sm120 target architectures. It requires CUDA
12.8.
---------
Co-authored-by: Sebastian Jodlowski <sjodlowski at nuro.ai>
(cherry picked from commit 0127f169dc8e0b5b6c2a24f74cd42d9d277916f6)
Added:
Modified:
clang/include/clang/Basic/BuiltinsNVPTX.td
clang/include/clang/Basic/Cuda.h
clang/lib/Basic/Cuda.cpp
clang/lib/Basic/Targets/NVPTX.cpp
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
clang/test/Misc/target-invalid-cpu-note/nvptx.c
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.td b/clang/include/clang/Basic/BuiltinsNVPTX.td
index 9d24a992563a4..b550fff8567df 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.td
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.td
@@ -21,12 +21,14 @@ class SM<string version, list<SMFeatures> newer_list> : SMFeatures {
!strconcat(f, "|", newer.Features));
}
+let Features = "sm_120a" in def SM_120a : SMFeatures;
+let Features = "sm_101a" in def SM_101a : SMFeatures;
let Features = "sm_100a" in def SM_100a : SMFeatures;
-
-def SM_100 : SM<"100", [SM_100a]>;
-
let Features = "sm_90a" in def SM_90a : SMFeatures;
+def SM_120 : SM<"120", [SM_120a]>;
+def SM_101 : SM<"101", [SM_101a, SM_120]>;
+def SM_100 : SM<"100", [SM_100a, SM_101]>;
def SM_90 : SM<"90", [SM_90a, SM_100]>;
def SM_89 : SM<"89", [SM_90]>;
def SM_87 : SM<"87", [SM_89]>;
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index f33ba46233a7a..5c909a8e9ca11 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -82,6 +82,10 @@ enum class OffloadArch {
SM_90a,
SM_100,
SM_100a,
+ SM_101,
+ SM_101a,
+ SM_120,
+ SM_120a,
GFX600,
GFX601,
GFX602,
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 1bfec0b37c5ee..79cac0ec119dd 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -100,6 +100,10 @@ static const OffloadArchToStringMap arch_names[] = {
SM(90a), // Hopper
SM(100), // Blackwell
SM(100a), // Blackwell
+ SM(101), // Blackwell
+ SM(101a), // Blackwell
+ SM(120), // Blackwell
+ SM(120a), // Blackwell
GFX(600), // gfx600
GFX(601), // gfx601
GFX(602), // gfx602
@@ -230,6 +234,10 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
return CudaVersion::CUDA_120;
case OffloadArch::SM_100:
case OffloadArch::SM_100a:
+ case OffloadArch::SM_101:
+ case OffloadArch::SM_101a:
+ case OffloadArch::SM_120:
+ case OffloadArch::SM_120a:
return CudaVersion::CUDA_128;
default:
llvm_unreachable("invalid enum");
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index a03f4983b9d03..9be12cbe7ac19 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -176,7 +176,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) {
// Set __CUDA_ARCH__ for the GPU specified.
- std::string CUDAArchCode = [this] {
+ llvm::StringRef CUDAArchCode = [this] {
switch (GPU) {
case OffloadArch::GFX600:
case OffloadArch::GFX601:
@@ -283,14 +283,27 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case OffloadArch::SM_100:
case OffloadArch::SM_100a:
return "1000";
+ case OffloadArch::SM_101:
+ case OffloadArch::SM_101a:
+ return "1010";
+ case OffloadArch::SM_120:
+ case OffloadArch::SM_120a:
+ return "1200";
}
llvm_unreachable("unhandled OffloadArch");
}();
Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
- if (GPU == OffloadArch::SM_90a)
- Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
- if (GPU == OffloadArch::SM_100a)
- Builder.defineMacro("__CUDA_ARCH_FEAT_SM100_ALL", "1");
+ switch(GPU) {
+ case OffloadArch::SM_90a:
+ case OffloadArch::SM_100a:
+ case OffloadArch::SM_101a:
+ case OffloadArch::SM_120a:
+ Builder.defineMacro("__CUDA_ARCH_FEAT_SM" + CUDAArchCode.drop_back() + "_ALL", "1");
+ break;
+ default:
+ // Do nothing if this is not an enhanced architecture.
+ break;
+ }
}
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index c13928f61a748..dc417880a50e9 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -2278,6 +2278,10 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
case OffloadArch::SM_90a:
case OffloadArch::SM_100:
case OffloadArch::SM_100a:
+ case OffloadArch::SM_101:
+ case OffloadArch::SM_101a:
+ case OffloadArch::SM_120:
+ case OffloadArch::SM_120a:
case OffloadArch::GFX600:
case OffloadArch::GFX601:
case OffloadArch::GFX602:
diff --git a/clang/test/Misc/target-invalid-cpu-note/nvptx.c b/clang/test/Misc/target-invalid-cpu-note/nvptx.c
index 3afcdf8c9fe5c..d8e4d7e63e234 100644
--- a/clang/test/Misc/target-invalid-cpu-note/nvptx.c
+++ b/clang/test/Misc/target-invalid-cpu-note/nvptx.c
@@ -28,6 +28,10 @@
// CHECK-SAME: {{^}}, sm_90a
// CHECK-SAME: {{^}}, sm_100
// CHECK-SAME: {{^}}, sm_100a
+// CHECK-SAME: {{^}}, sm_101
+// CHECK-SAME: {{^}}, sm_101a
+// CHECK-SAME: {{^}}, sm_120
+// CHECK-SAME: {{^}}, sm_120a
// CHECK-SAME: {{^}}, gfx600
// CHECK-SAME: {{^}}, gfx601
// CHECK-SAME: {{^}}, gfx602
More information about the llvm-branch-commits
mailing list