[clang] [CUDA] Add support for sm101 and sm120 target architectures (PR #127187)
Sebastian Jodłowski via cfe-commits
cfe-commits at lists.llvm.org
Mon Feb 17 10:09:24 PST 2025
https://github.com/jodelek updated https://github.com/llvm/llvm-project/pull/127187
>From a55c76bac1bd70878c777b5930553fac114d2fd5 Mon Sep 17 00:00:00 2001
From: Sebastian Jodlowski <sjodlowski at nuro.ai>
Date: Fri, 14 Feb 2025 01:34:16 -0800
Subject: [PATCH 1/3] Add support for sm101 target architecture (Tegra
Blackwell)
---
clang/include/clang/Basic/BuiltinsNVPTX.td | 4 ++++
clang/include/clang/Basic/Cuda.h | 2 ++
clang/lib/Basic/Cuda.cpp | 4 ++++
clang/lib/Basic/Targets/NVPTX.cpp | 5 +++++
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 2 ++
clang/test/Misc/target-invalid-cpu-note/nvptx.c | 2 ++
6 files changed, 19 insertions(+)
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.td b/clang/include/clang/Basic/BuiltinsNVPTX.td
index 327dc88cffdb4..3853e7dc8fbaf 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.td
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.td
@@ -21,6 +21,10 @@ class SM<string version, list<SMFeatures> newer_list> : SMFeatures {
!strconcat(f, "|", newer.Features));
}
+let Features = "sm_101a" in def SM_101a : SMFeatures;
+
+def SM_101 : SM<"101", [SM_101a]>;
+
let Features = "sm_100a" in def SM_100a : SMFeatures;
def SM_100 : SM<"100", [SM_100a]>;
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index f33ba46233a7a..380f51fed22a2 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -82,6 +82,8 @@ enum class OffloadArch {
SM_90a,
SM_100,
SM_100a,
+ SM_101,
+ SM_101a,
GFX600,
GFX601,
GFX602,
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 1bfec0b37c5ee..e92a12b3ce3be 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -100,6 +100,8 @@ static const OffloadArchToStringMap arch_names[] = {
SM(90a), // Hopper
SM(100), // Blackwell
SM(100a), // Blackwell
+ SM(101), // Blackwell
+ SM(101a), // Blackwell
GFX(600), // gfx600
GFX(601), // gfx601
GFX(602), // gfx602
@@ -230,6 +232,8 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
return CudaVersion::CUDA_120;
case OffloadArch::SM_100:
case OffloadArch::SM_100a:
+ case OffloadArch::SM_101:
+ case OffloadArch::SM_101a:
return CudaVersion::CUDA_128;
default:
llvm_unreachable("invalid enum");
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index 7d13c1f145440..3f3c1bb653b04 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -292,6 +292,9 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case OffloadArch::SM_100:
case OffloadArch::SM_100a:
return "1000";
+ case OffloadArch::SM_101:
+ case OffloadArch::SM_101a:
+ return "1010";
}
llvm_unreachable("unhandled OffloadArch");
}();
@@ -300,6 +303,8 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
if (GPU == OffloadArch::SM_100a)
Builder.defineMacro("__CUDA_ARCH_FEAT_SM100_ALL", "1");
+ if (GPU == OffloadArch::SM_101a)
+ Builder.defineMacro("__CUDA_ARCH_FEAT_SM101_ALL", "1");
}
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index c13928f61a748..2cac1eb73b438 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -2278,6 +2278,8 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
case OffloadArch::SM_90a:
case OffloadArch::SM_100:
case OffloadArch::SM_100a:
+ case OffloadArch::SM_101:
+ case OffloadArch::SM_101a:
case OffloadArch::GFX600:
case OffloadArch::GFX601:
case OffloadArch::GFX602:
diff --git a/clang/test/Misc/target-invalid-cpu-note/nvptx.c b/clang/test/Misc/target-invalid-cpu-note/nvptx.c
index 3afcdf8c9fe5c..ed9d4865c3ec9 100644
--- a/clang/test/Misc/target-invalid-cpu-note/nvptx.c
+++ b/clang/test/Misc/target-invalid-cpu-note/nvptx.c
@@ -28,6 +28,8 @@
// CHECK-SAME: {{^}}, sm_90a
// CHECK-SAME: {{^}}, sm_100
// CHECK-SAME: {{^}}, sm_100a
+// CHECK-SAME: {{^}}, sm_101
+// CHECK-SAME: {{^}}, sm_101a
// CHECK-SAME: {{^}}, gfx600
// CHECK-SAME: {{^}}, gfx601
// CHECK-SAME: {{^}}, gfx602
>From af2bf2fc8bcaaa8763086504f771b0a70810f2d3 Mon Sep 17 00:00:00 2001
From: Sebastian Jodlowski <sjodlowski at nuro.ai>
Date: Fri, 14 Feb 2025 14:58:59 -0800
Subject: [PATCH 2/3] Add support for sm120 as well.
---
clang/include/clang/Basic/BuiltinsNVPTX.td | 8 ++++++--
clang/include/clang/Basic/Cuda.h | 2 ++
clang/lib/Basic/Cuda.cpp | 4 ++++
clang/lib/Basic/Targets/NVPTX.cpp | 5 +++++
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 2 ++
clang/test/Misc/target-invalid-cpu-note/nvptx.c | 2 ++
6 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.td b/clang/include/clang/Basic/BuiltinsNVPTX.td
index 3853e7dc8fbaf..4559e3765a66e 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.td
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.td
@@ -21,13 +21,17 @@ class SM<string version, list<SMFeatures> newer_list> : SMFeatures {
!strconcat(f, "|", newer.Features));
}
+let Features = "sm_120a" in def SM_120a : SMFeatures;
+
+def SM_120 : SM<"120", [SM_120a]>;
+
let Features = "sm_101a" in def SM_101a : SMFeatures;
-def SM_101 : SM<"101", [SM_101a]>;
+def SM_101 : SM<"101", [SM_101a, SM_120]>;
let Features = "sm_100a" in def SM_100a : SMFeatures;
-def SM_100 : SM<"100", [SM_100a]>;
+def SM_100 : SM<"100", [SM_100a, SM_101]>;
let Features = "sm_90a" in def SM_90a : SMFeatures;
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index 380f51fed22a2..5c909a8e9ca11 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -84,6 +84,8 @@ enum class OffloadArch {
SM_100a,
SM_101,
SM_101a,
+ SM_120,
+ SM_120a,
GFX600,
GFX601,
GFX602,
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index e92a12b3ce3be..79cac0ec119dd 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -102,6 +102,8 @@ static const OffloadArchToStringMap arch_names[] = {
SM(100a), // Blackwell
SM(101), // Blackwell
SM(101a), // Blackwell
+ SM(120), // Blackwell
+ SM(120a), // Blackwell
GFX(600), // gfx600
GFX(601), // gfx601
GFX(602), // gfx602
@@ -234,6 +236,8 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
case OffloadArch::SM_100a:
case OffloadArch::SM_101:
case OffloadArch::SM_101a:
+ case OffloadArch::SM_120:
+ case OffloadArch::SM_120a:
return CudaVersion::CUDA_128;
default:
llvm_unreachable("invalid enum");
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index 3f3c1bb653b04..291257b7b371a 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -295,6 +295,9 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case OffloadArch::SM_101:
case OffloadArch::SM_101a:
return "1010";
+ case OffloadArch::SM_120:
+ case OffloadArch::SM_120a:
+ return "1200";
}
llvm_unreachable("unhandled OffloadArch");
}();
@@ -305,6 +308,8 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__CUDA_ARCH_FEAT_SM100_ALL", "1");
if (GPU == OffloadArch::SM_101a)
Builder.defineMacro("__CUDA_ARCH_FEAT_SM101_ALL", "1");
+ if (GPU == OffloadArch::SM_120a)
+ Builder.defineMacro("__CUDA_ARCH_FEAT_SM120_ALL", "1");
}
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 2cac1eb73b438..dc417880a50e9 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -2280,6 +2280,8 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
case OffloadArch::SM_100a:
case OffloadArch::SM_101:
case OffloadArch::SM_101a:
+ case OffloadArch::SM_120:
+ case OffloadArch::SM_120a:
case OffloadArch::GFX600:
case OffloadArch::GFX601:
case OffloadArch::GFX602:
diff --git a/clang/test/Misc/target-invalid-cpu-note/nvptx.c b/clang/test/Misc/target-invalid-cpu-note/nvptx.c
index ed9d4865c3ec9..d8e4d7e63e234 100644
--- a/clang/test/Misc/target-invalid-cpu-note/nvptx.c
+++ b/clang/test/Misc/target-invalid-cpu-note/nvptx.c
@@ -30,6 +30,8 @@
// CHECK-SAME: {{^}}, sm_100a
// CHECK-SAME: {{^}}, sm_101
// CHECK-SAME: {{^}}, sm_101a
+// CHECK-SAME: {{^}}, sm_120
+// CHECK-SAME: {{^}}, sm_120a
// CHECK-SAME: {{^}}, gfx600
// CHECK-SAME: {{^}}, gfx601
// CHECK-SAME: {{^}}, gfx602
>From e01df6d72a5803e4e69c0aaf8b39c090f98e2c8e Mon Sep 17 00:00:00 2001
From: Sebastian Jodlowski <sjodlowski at nuro.ai>
Date: Mon, 17 Feb 2025 10:09:03 -0800
Subject: [PATCH 3/3] Code style fixes - part1.
---
clang/include/clang/Basic/BuiltinsNVPTX.td | 12 +++---------
1 file changed, 3 insertions(+), 9 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.td b/clang/include/clang/Basic/BuiltinsNVPTX.td
index 4559e3765a66e..61e48b31c244b 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.td
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.td
@@ -22,19 +22,13 @@ class SM<string version, list<SMFeatures> newer_list> : SMFeatures {
}
let Features = "sm_120a" in def SM_120a : SMFeatures;
-
-def SM_120 : SM<"120", [SM_120a]>;
-
let Features = "sm_101a" in def SM_101a : SMFeatures;
-
-def SM_101 : SM<"101", [SM_101a, SM_120]>;
-
let Features = "sm_100a" in def SM_100a : SMFeatures;
-
-def SM_100 : SM<"100", [SM_100a, SM_101]>;
-
let Features = "sm_90a" in def SM_90a : SMFeatures;
+def SM_120 : SM<"120", [SM_120a]>;
+def SM_101 : SM<"101", [SM_101a, SM_120]>;
+def SM_100 : SM<"100", [SM_100a, SM_101]>;
def SM_90 : SM<"90", [SM_90a, SM_100]>;
def SM_89 : SM<"89", [SM_90]>;
def SM_87 : SM<"87", [SM_89]>;
More information about the cfe-commits
mailing list