[llvm] 616979e - [NVPTX] Add support for PTX 8.6 and CUDA 12.6 (12.8) (#123398)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 21 02:00:27 PST 2025
Author: Sergey Kozub
Date: 2025-01-21T11:00:24+01:00
New Revision: 616979ebd7dc9ae63522788750ea3dc6a96aa69f
URL: https://github.com/llvm/llvm-project/commit/616979ebd7dc9ae63522788750ea3dc6a96aa69f
DIFF: https://github.com/llvm/llvm-project/commit/616979ebd7dc9ae63522788750ea3dc6a96aa69f.diff
LOG: [NVPTX] Add support for PTX 8.6 and CUDA 12.6 (12.8) (#123398)
Add CUDA versions 12.7, 12.8, 12.9 which support PTX8.6+ (enables using Blackwell-specific instructions).
Added:
Modified:
clang/include/clang/Basic/BuiltinsNVPTX.def
clang/include/clang/Basic/Cuda.h
clang/lib/Basic/Cuda.cpp
clang/lib/Basic/Targets/NVPTX.cpp
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
clang/lib/Driver/ToolChains/Cuda.cpp
clang/test/Misc/target-invalid-cpu-note/nvptx.c
llvm/lib/Target/NVPTX/NVPTX.td
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def
index 969dd9e41ebfa3..37b4e6ff77fda6 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.def
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -28,7 +28,9 @@
#pragma push_macro("SM_90")
#pragma push_macro("SM_90a")
#pragma push_macro("SM_100")
-#define SM_100 "sm_100"
+#pragma push_macro("SM_100a")
+#define SM_100a "sm_100a"
+#define SM_100 "sm_100|" SM_100a
#define SM_90a "sm_90a"
#define SM_90 "sm_90|" SM_90a "|" SM_100
#define SM_89 "sm_89|" SM_90
@@ -1091,6 +1093,7 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
#pragma pop_macro("SM_90")
#pragma pop_macro("SM_90a")
#pragma pop_macro("SM_100")
+#pragma pop_macro("SM_100a")
#pragma pop_macro("PTX42")
#pragma pop_macro("PTX60")
#pragma pop_macro("PTX61")
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index c2a4addf488df1..1cdfc8178db843 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -44,9 +44,12 @@ enum class CudaVersion {
CUDA_124,
CUDA_125,
CUDA_126,
+ CUDA_127,
+ CUDA_128,
+ CUDA_129,
FULLY_SUPPORTED = CUDA_123,
PARTIALLY_SUPPORTED =
- CUDA_126, // Partially supported. Proceed with a warning.
+ CUDA_129, // Partially supported. Proceed with a warning.
NEW = 10000, // Too new. Issue a warning, but allow using it.
};
const char *CudaVersionToString(CudaVersion V);
@@ -80,6 +83,7 @@ enum class OffloadArch {
SM_90,
SM_90a,
SM_100,
+ SM_100a,
GFX600,
GFX601,
GFX602,
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index d56609a2a8f24a..b1461429d4f51a 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -44,6 +44,9 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = {
CUDA_ENTRY(12, 4),
CUDA_ENTRY(12, 5),
CUDA_ENTRY(12, 6),
+ CUDA_ENTRY(12, 7),
+ CUDA_ENTRY(12, 8),
+ CUDA_ENTRY(12, 9),
{"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())},
{"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone.
};
@@ -98,6 +101,7 @@ static const OffloadArchToStringMap arch_names[] = {
SM(90), // Hopper
SM(90a), // Hopper
SM(100), // Blackwell
+ SM(100a), // Blackwell
GFX(600), // gfx600
GFX(601), // gfx601
GFX(602), // gfx602
@@ -227,8 +231,8 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
case OffloadArch::SM_90a:
return CudaVersion::CUDA_120;
case OffloadArch::SM_100:
- return CudaVersion::NEW; // TODO: use specific CUDA version once it's
- // public.
+ case OffloadArch::SM_100a:
+ return CudaVersion::CUDA_127;
default:
llvm_unreachable("invalid enum");
}
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index dbc3fec3657610..56efad90cb7c84 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -285,6 +285,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case OffloadArch::SM_90a:
return "900";
case OffloadArch::SM_100:
+ case OffloadArch::SM_100a:
return "1000";
}
llvm_unreachable("unhandled OffloadArch");
@@ -292,6 +293,8 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
if (GPU == OffloadArch::SM_90a)
Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
+ if (GPU == OffloadArch::SM_100a)
+ Builder.defineMacro("__CUDA_ARCH_FEAT_SM100_ALL", "1");
}
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 87c3635ed3f70e..c13928f61a7481 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -2277,6 +2277,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
case OffloadArch::SM_90:
case OffloadArch::SM_90a:
case OffloadArch::SM_100:
+ case OffloadArch::SM_100a:
case OffloadArch::GFX600:
case OffloadArch::GFX601:
case OffloadArch::GFX602:
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 8967115bcc73d9..27e1969dabe551 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -89,6 +89,12 @@ CudaVersion getCudaVersion(uint32_t raw_version) {
return CudaVersion::CUDA_125;
if (raw_version < 12070)
return CudaVersion::CUDA_126;
+ if (raw_version < 12080)
+ return CudaVersion::CUDA_127;
+ if (raw_version < 12090)
+ return CudaVersion::CUDA_128;
+ if (raw_version < 12100)
+ return CudaVersion::CUDA_129;
return CudaVersion::NEW;
}
@@ -682,6 +688,9 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
case CudaVersion::CUDA_##CUDA_VER: \
PtxFeature = "+ptx" #PTX_VER; \
break;
+ CASE_CUDA_VERSION(129, 87);
+ CASE_CUDA_VERSION(128, 87);
+ CASE_CUDA_VERSION(127, 86);
CASE_CUDA_VERSION(126, 85);
CASE_CUDA_VERSION(125, 85);
CASE_CUDA_VERSION(124, 84);
diff --git a/clang/test/Misc/target-invalid-cpu-note/nvptx.c b/clang/test/Misc/target-invalid-cpu-note/nvptx.c
index 3ea6c02d6b3846..3afcdf8c9fe5c7 100644
--- a/clang/test/Misc/target-invalid-cpu-note/nvptx.c
+++ b/clang/test/Misc/target-invalid-cpu-note/nvptx.c
@@ -27,6 +27,7 @@
// CHECK-SAME: {{^}}, sm_90
// CHECK-SAME: {{^}}, sm_90a
// CHECK-SAME: {{^}}, sm_100
+// CHECK-SAME: {{^}}, sm_100a
// CHECK-SAME: {{^}}, gfx600
// CHECK-SAME: {{^}}, gfx601
// CHECK-SAME: {{^}}, gfx602
diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td
index 9af8715ef52ae7..3ca8b4d294079c 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.td
+++ b/llvm/lib/Target/NVPTX/NVPTX.td
@@ -39,6 +39,7 @@ foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>;
def SM90a: FeatureSM<"90a", 901>;
+def SM100a: FeatureSM<"100a", 1001>;
foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65,
70, 71, 72, 73, 74, 75, 76, 77, 78,
@@ -74,6 +75,7 @@ def : Proc<"sm_89", [SM89, PTX78]>;
def : Proc<"sm_90", [SM90, PTX78]>;
def : Proc<"sm_90a", [SM90a, PTX80]>;
def : Proc<"sm_100", [SM100, PTX86]>;
+def : Proc<"sm_100a", [SM100a, PTX86]>;
def NVPTXInstrInfo : InstrInfo {
}
More information about the llvm-commits
mailing list