[clang] [llvm] [mlir] [WIP][CUDA] update default CUDA sm ver to 75 (PR #170679)
RafaĆ Rudnicki via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 4 07:47:08 PST 2025
https://github.com/bratpiorka updated https://github.com/llvm/llvm-project/pull/170679
>From c0b4ba3ae32f436be7f5dd8c0ad1c0943965f008 Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki at intel.com>
Date: Thu, 4 Dec 2025 09:53:57 +0100
Subject: [PATCH 1/4] bump default SM for CUDA to 75
---
clang/include/clang/Basic/OffloadArch.h | 2 +-
mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h | 4 ++--
mlir/include/mlir/Dialect/GPU/Transforms/Passes.td | 4 ++--
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 8 ++++----
mlir/test/python/dialects/gpu/dialect.py | 4 ++--
5 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/clang/include/clang/Basic/OffloadArch.h b/clang/include/clang/Basic/OffloadArch.h
index befb69ff22d49..08e50698d72ef 100644
--- a/clang/include/clang/Basic/OffloadArch.h
+++ b/clang/include/clang/Basic/OffloadArch.h
@@ -113,7 +113,7 @@ enum class OffloadArch {
BMG_G21,
LAST,
- CudaDefault = OffloadArch::SM_52,
+ CudaDefault = OffloadArch::SM_75,
HIPDefault = OffloadArch::GFX906,
};
diff --git a/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h b/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h
index 34c85de3418ec..f27c9048c63e9 100644
--- a/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h
@@ -28,11 +28,11 @@ struct GPUToNVVMPipelineOptions
llvm::cl::init("nvptx64-nvidia-cuda")};
PassOptions::Option<std::string> cubinChip{
*this, "cubin-chip", llvm::cl::desc("Chip to use to serialize to cubin."),
- llvm::cl::init("sm_50")};
+ llvm::cl::init("sm_75")};
PassOptions::Option<std::string> cubinFeatures{
*this, "cubin-features",
llvm::cl::desc("Features to use to serialize to cubin."),
- llvm::cl::init("+ptx60")};
+ llvm::cl::init("+ptx63")};
PassOptions::Option<std::string> cubinFormat{
*this, "cubin-format",
llvm::cl::desc("Compilation format to use to serialize to cubin."),
diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
index 0c8a0c7a677ab..5a3e50cb1b6b4 100644
--- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
@@ -143,10 +143,10 @@ def GpuNVVMAttachTarget: Pass<"nvvm-attach-target", ""> {
/*default=*/ "\"nvptx64-nvidia-cuda\"",
"Target triple.">,
Option<"chip", "chip", "std::string",
- /*default=*/"\"sm_50\"",
+ /*default=*/"\"sm_75\"",
"Target chip.">,
Option<"features", "features", "std::string",
- /*default=*/"\"+ptx60\"",
+ /*default=*/"\"+ptx63\"",
"Target features.">,
Option<"optLevel", "O", "unsigned",
/*default=*/"2",
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index c4eb4872af2c6..d323aa08274cc 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -5849,8 +5849,8 @@ def NVVM_TargetAttr : NVVM_Attr<"NVVMTarget", "target",
let parameters = (ins
DefaultValuedParameter<"int", "2", "Optimization level to apply.">:$O,
StringRefParameter<"Target triple.", "\"nvptx64-nvidia-cuda\"">:$triple,
- StringRefParameter<"Target chip.", "\"sm_50\"">:$chip,
- StringRefParameter<"Target chip features.", "\"+ptx60\"">:$features,
+ StringRefParameter<"Target chip.", "\"sm_75\"">:$chip,
+ StringRefParameter<"Target chip features.", "\"+ptx63\"">:$features,
OptionalParameter<"DictionaryAttr", "Target specific flags.">:$flags,
OptionalParameter<"ArrayAttr", "Files to link to the LLVM module.">:$link,
DefaultValuedParameter<"bool", "true", "Perform SM version check on Ops.">:$verifyTarget
@@ -5861,8 +5861,8 @@ def NVVM_TargetAttr : NVVM_Attr<"NVVMTarget", "target",
let builders = [
AttrBuilder<(ins CArg<"int", "2">:$optLevel,
CArg<"StringRef", "\"nvptx64-nvidia-cuda\"">:$triple,
- CArg<"StringRef", "\"sm_50\"">:$chip,
- CArg<"StringRef", "\"+ptx60\"">:$features,
+ CArg<"StringRef", "\"sm_75\"">:$chip,
+ CArg<"StringRef", "\"+ptx63\"">:$features,
CArg<"DictionaryAttr", "nullptr">:$targetFlags,
CArg<"ArrayAttr", "nullptr">:$linkFiles,
CArg<"bool", "true">:$verifyTarget), [{
diff --git a/mlir/test/python/dialects/gpu/dialect.py b/mlir/test/python/dialects/gpu/dialect.py
index 1a009b7dfa30d..ba1746c402743 100644
--- a/mlir/test/python/dialects/gpu/dialect.py
+++ b/mlir/test/python/dialects/gpu/dialect.py
@@ -51,10 +51,10 @@ def testObjectAttr():
print(o)
object = (
- b"//\n// Generated by LLVM NVPTX Back-End\n//\n\n.version 6.0\n.target sm_50"
+ b"//\n// Generated by LLVM NVPTX Back-End\n//\n\n.version 6.3\n.target sm_75"
)
o = gpu.ObjectAttr.get(target, format, object)
- # CHECK: #gpu.object<#nvvm.target, "//\0A// Generated by LLVM NVPTX Back-End\0A//\0A\0A.version 6.0\0A.target sm_50">
+ # CHECK: #gpu.object<#nvvm.target, "//\0A// Generated by LLVM NVPTX Back-End\0A//\0A\0A.version 6.3\0A.target sm_75">
print(o)
assert o.object == object
>From cf28420e72d39d74b404a92675213fd0abf02a1e Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki at intel.com>
Date: Thu, 4 Dec 2025 09:55:03 +0100
Subject: [PATCH 2/4] make CUDA 10.0 the default toolkit for tests
---
.../CUDA/v10.0/bin/.keep | 0
.../CUDA/v10.0/bin/version.txt | 2 ++
.../CUDA/v10.0/include/.keep | 0
.../CUDA/v10.0/lib/.keep | 0
.../CUDA/v10.0/nvvm/libdevice/libdevice.10.bc | 0
.../nvvm/libdevice/libdevice.compute_30.10.bc | 0
.../nvvm/libdevice/libdevice.compute_35.10.bc | 0
.../nvvm/libdevice/libdevice.compute_50.10.bc | 0
.../CUDA/v10.0/version.txt | 2 ++
.../Inputs/CUDA/usr/local/cuda/bin/fatbinary | 0
.../Inputs/CUDA/usr/local/cuda/include/cuda.h | 7 +++++++
.../local/cuda/nvvm/libdevice/libdevice.10.bc | 0
.../nvvm/libdevice/libdevice.compute_20.10.bc | 0
.../nvvm/libdevice/libdevice.compute_50.10.bc | 0
.../Inputs/CUDA_100/usr/local/cuda/bin/.keep | 0
.../Inputs/CUDA_100/usr/local/cuda/include/.keep | 0
.../CUDA_100/usr/local/cuda/include/cuda.h | 7 +++++++
.../Inputs/CUDA_100/usr/local/cuda/lib/.keep | 0
.../Inputs/CUDA_100/usr/local/cuda/lib64/.keep | 0
.../local/cuda/nvvm/libdevice/libdevice.10.bc | 0
.../nvvm/libdevice/libdevice.compute_20.10.bc | 0
.../nvvm/libdevice/libdevice.compute_30.10.bc | 0
.../nvvm/libdevice/libdevice.compute_35.10.bc | 0
.../nvvm/libdevice/libdevice.compute_50.10.bc | 0
.../Inputs/CUDA_70/usr/local/cuda/bin/.keep | 0
.../Inputs/CUDA_70/usr/local/cuda/include/.keep | 0
.../Inputs/CUDA_70/usr/local/cuda/include/cuda.h | 7 +++++++
.../Inputs/CUDA_70/usr/local/cuda/lib/.keep | 0
.../Inputs/CUDA_70/usr/local/cuda/lib64/.keep | 0
.../nvvm/libdevice/libdevice.compute_20.10.bc | 0
.../nvvm/libdevice/libdevice.compute_30.10.bc | 0
.../nvvm/libdevice/libdevice.compute_35.10.bc | 0
.../nvvm/libdevice/libdevice.compute_50.10.bc | 0
clang/test/Driver/Inputs/SYCL/objnvptx64-sm_75.o | Bin 0 -> 3736 bytes
34 files changed, 25 insertions(+)
create mode 100644 clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/bin/.keep
create mode 100644 clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/bin/version.txt
create mode 100644 clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/include/.keep
create mode 100644 clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/lib/.keep
create mode 100644 clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/nvvm/libdevice/libdevice.10.bc
create mode 100644 clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/nvvm/libdevice/libdevice.compute_30.10.bc
create mode 100644 clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/nvvm/libdevice/libdevice.compute_35.10.bc
create mode 100644 clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/nvvm/libdevice/libdevice.compute_50.10.bc
create mode 100644 clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/version.txt
create mode 100755 clang/test/Driver/Inputs/CUDA/usr/local/cuda/bin/fatbinary
create mode 100644 clang/test/Driver/Inputs/CUDA/usr/local/cuda/include/cuda.h
create mode 100644 clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.10.bc
create mode 100644 clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc
create mode 100644 clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc
create mode 100644 clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/bin/.keep
create mode 100644 clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/include/.keep
create mode 100644 clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/include/cuda.h
create mode 100644 clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/lib/.keep
create mode 100644 clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/lib64/.keep
create mode 100644 clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.10.bc
create mode 100644 clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc
create mode 100644 clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_30.10.bc
create mode 100644 clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc
create mode 100644 clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc
create mode 100644 clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/bin/.keep
create mode 100644 clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/include/.keep
create mode 100644 clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/include/cuda.h
create mode 100644 clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/lib/.keep
create mode 100644 clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/lib64/.keep
create mode 100644 clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc
create mode 100644 clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_30.10.bc
create mode 100644 clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc
create mode 100644 clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc
create mode 100644 clang/test/Driver/Inputs/SYCL/objnvptx64-sm_75.o
diff --git a/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/bin/.keep b/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/bin/.keep
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/bin/version.txt b/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/bin/version.txt
new file mode 100644
index 0000000000000..16a5a2337f83f
--- /dev/null
+++ b/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/bin/version.txt
@@ -0,0 +1,2 @@
+CUDA Version 10.0.130
+
diff --git a/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/include/.keep b/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/include/.keep
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/lib/.keep b/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/lib/.keep
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/nvvm/libdevice/libdevice.10.bc b/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/nvvm/libdevice/libdevice.10.bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/nvvm/libdevice/libdevice.compute_30.10.bc b/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/nvvm/libdevice/libdevice.compute_30.10.bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/nvvm/libdevice/libdevice.compute_35.10.bc b/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/nvvm/libdevice/libdevice.compute_35.10.bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/nvvm/libdevice/libdevice.compute_50.10.bc b/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/nvvm/libdevice/libdevice.compute_50.10.bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/version.txt b/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/version.txt
new file mode 100644
index 0000000000000..16a5a2337f83f
--- /dev/null
+++ b/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/version.txt
@@ -0,0 +1,2 @@
+CUDA Version 10.0.130
+
diff --git a/clang/test/Driver/Inputs/CUDA/usr/local/cuda/bin/fatbinary b/clang/test/Driver/Inputs/CUDA/usr/local/cuda/bin/fatbinary
new file mode 100755
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA/usr/local/cuda/include/cuda.h b/clang/test/Driver/Inputs/CUDA/usr/local/cuda/include/cuda.h
new file mode 100644
index 0000000000000..c576bebd470dc
--- /dev/null
+++ b/clang/test/Driver/Inputs/CUDA/usr/local/cuda/include/cuda.h
@@ -0,0 +1,7 @@
+//
+// Placeholder file for testing CUDA version detection
+//
+
+#define CUDA_VERSION 10000
+
+//
diff --git a/clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.10.bc b/clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.10.bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc b/clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc b/clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/bin/.keep b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/bin/.keep
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/include/.keep b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/include/.keep
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/include/cuda.h b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/include/cuda.h
new file mode 100644
index 0000000000000..c576bebd470dc
--- /dev/null
+++ b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/include/cuda.h
@@ -0,0 +1,7 @@
+//
+// Placeholder file for testing CUDA version detection
+//
+
+#define CUDA_VERSION 10000
+
+//
diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/lib/.keep b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/lib/.keep
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/lib64/.keep b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/lib64/.keep
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.10.bc b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.10.bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_30.10.bc b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_30.10.bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/bin/.keep b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/bin/.keep
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/include/.keep b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/include/.keep
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/include/cuda.h b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/include/cuda.h
new file mode 100644
index 0000000000000..558f2e2d02093
--- /dev/null
+++ b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/include/cuda.h
@@ -0,0 +1,7 @@
+//
+// Placeholder file for testing CUDA version detection
+//
+
+#define CUDA_VERSION 7000
+
+//
diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/lib/.keep b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/lib/.keep
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/lib64/.keep b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/lib64/.keep
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_30.10.bc b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_30.10.bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/SYCL/objnvptx64-sm_75.o b/clang/test/Driver/Inputs/SYCL/objnvptx64-sm_75.o
new file mode 100644
index 0000000000000000000000000000000000000000..bfac6d6de63cc8fc8da5cc948350dcae84d15ec6
GIT binary patch
literal 3736
zcmcf^ZA at F&^%;YoN!({Ap)GD~_c^eLwlSDTLTqd%e<TiTY2$oN*+gah8N6WguwRUE
ztJcOek7}0YMMy(crWq~jvZ<QL6iu0wO~9knrB>>qsX{G6c4G*sXhPGhs<cSmxyD`z
zOQrpocBFgHJ?EbD?z!j7*RLP$v{dkTSQ&WO53%p4G8e<fwtu%a6qg1}1d^vZ-duj~
z at B_qYo_v`L4mx{PE_cZ5boY84s-C_egJI5IyZ;OsatDImK0irOjp|1A881UN^e_w^
zY-?%)qUW5W(b?x~as=!Q?d=PkwKw(lhJ1}kj#9T$R4e85*r}ES%?H&7w9RgZUE8c_
z)jHJF0XOAv9&|Rl?azP_HAUMnj2AhM at xl_`ZV at g_!i8kOM+1QvO-4d;zQX*N;CT>0
z^!LC~;8$W_e*36xK34G`?3aPFa}&RK7VyZDB+__D%Vslo8eckMJ8H3X9yK1anNGcQ
zsPnMRhFqr#Yg%os`&H-sXZ?KxepRp6e=e*#<3EQrGVU;gA&>doeptBNH26D+G43E^
z3;HzbMkh^UKD*bCHTwG)cjJ+lPN{;7-FX&kM1Hl|T`ma68LTnj?zK0%du*P7-RH&{
zgG_+2I{***5dJ>vigEUbXeJEi`9ofp*RFD&bJ<lvpY7lQ%w(QiIsl=lg4;So>iQRd
z^0&8r|Gav1ce?%%kE+6yB8U*UVS9zL=#4!fhsWxEX-rrCNFbrK7kI;>A5;mYiprlm
zqN2kUrVHCmE(ukm5!Sx2jiN?6CR|GM`H{*aGRjonpBC<@f5vb=A$fsExrT)oWMrLr
zWEqo6O_kDT8=g%b>muv^VmL37a`*?2OQ}Sn9+y&%ecE(#M_qd72qABC9XgVh>@iy^
zq)%aQl3*9uv&T~@`IBkreaUf{lKs=xj&0R@&9hY|YOhfARvi|9zhZpbM}|omuL0Y&
zcVGe!r|?%N_-#G>feDr&Sw_gV*0K?jr57gf<*Z=68;_{*Knf3%xGzbB40xm)XR?CZ
z5<C#aBgGl`0V^I#;b_hZzLubMe>d*y7K0X2&kF7onTQ1S)Q;(bHS*D?>rehw|GU?3
z`DB>`=Q;M|<iU4LaD@$!U{8}*jH$~p3^P0-#89?ViZO|FM`tooz4yp$rTO{l8k07G
z)fjnaDeDo~zfy}~UjS|qMrAX3nWu)=K4R7ka|u%v9}MHcv3}7CCjLS~+>a71B9mfz
zN`7Np=22G_#p1g&{PwiDJ;86!gcA&nzaqp3J6M{=BQ!r0W@(a at W_A#3Ng}Tn`_<x`
z>%_X1Xpzjwmz1&vrF`11 at TlL^U61RsX6;qJ{(4+L%-3bsYeL=lD?RwZ9yS1Tl%<m_
z-HnGD at X$9^@Y#VVYZJ2lwQOLF9UP18yR%N*$r1mUB-Ye?M|xaV7?)*=^P*igr<Bda
z<a2hx$4Rp`ZPtzz=WCp1)T|ld>qfaIIP}4g?(clv1SfjW%hIRW03By~@NhTF)Ur&9
zmEJfZzJrPHS&2fDxM>jQNn#CdK{m)2rxeph#k^9s7*j0fWQDx^bDqLedrUK7Zo3%Q
zWvun3i*&OCVLUu`tn+?Sj69(DnPSc^TUE-JrsS{j6v$wueBCZM`9 at rm;dB$`c5ujp
zX??-08v&9_yt at r<$_{`%LOjxd2gYJuHzmZ{B(ZEIzEams%^MYk{F4iL`4Sk=A=OR9
z+porV4|AG?S)1YX*QT^-4(_PWbX at Hh;$T%jIPf$p{dAUC>lS~J6ReZsWwm&j6n||c
z{1W1(ngH^voZxnjC|HSa)ODY&*=5r)`9l85HKlw-DW6fwA%d&Qr#<Aab=S at 9*ZA7e
zxHfb8Iz%5X8v#2z*a4cAesGJps}|>T#BGE4b_!16c>LgH9PVNpgH%BtX0i~<*sxKX
z<G^+85UGq|CMBO6m*<R%r74-G-l-<?D86F&$g<k3jGwE;gB`3b#l9K^-7BL!eS*`E
za+<8U4bmwC9usvaA2-<*VCs!2`SDndr}K=OxJ!!f<^*@G;```R#BxrsLW=WQxa at BY
z at lX#wC}gF_pSs5BAP+B at H5u?BUz?2UGkiTbbCeTV9-D>Kg^U`Py%tm4<3Su}uLvLm
z;X4qEmYvWFXsGhb_+SHsZ0f2}Gr`wg=QIy0+OKnMqhMQdv0IT-$_rD9#b^zr!IDui
z9aGHb<%>`by?tu&dXl);E%syE^BcCol}Yg>D*@$m4=OQ9+|3bRVb~yvVZBhh7Ze?&
zc|;_wcDd5;ld(i4WwBUx#Z0N>mosUh)^tAM7+Muz*fy8y#WvB0`~hc=JzxuZBW`~v
z<WmLQo?f?;QF)liLo^+5J5|B{0JG_*zWCdrqWipl#vS;-{o2E6eLlaJhMzY-Ln7->
zcztxQn>0W79C^}YG-6nt(5N1g2<wDXfFwXQ4 at qk4gzq+Usmc#T{JQ-uqD(fUZWyMx
z$(kFYq0chpz7IvQW^*#5CZ+p1x+g2*{%oXnj-?U{3u~WreX^APd||FDvoL*Q#qvz5
z<E_gVmvVE9vkP4-x%ooZ+FW5}ereHC{RbDK*Sg+b`n)hZXQ_VYWKn%SJ-4`Oss7`u
zMQW~4SX}B_otd7tRJUTkho0L6aofHv&;Y#*kDR8*png#5YqQ+YifnfLu~o>4DO6?~
zJ>rS|H at k^jR%kuq0%}0wTUO%!L^Oty40XmZ3ssH!!fE;<r at 6}0U5dAlausKx$;uY<
zvPDu>u%mf3rkJ+Nvg5M!Mlsu`c;fqzWKa)LV*AKOQB{4Yff8DPM#(`*>!5=`e^2vu
z`x>DB=izJc%KAuxVUSW7p3z^5>n=hg<+rEedT6Ri4z{s=a^v+w5t2QXKqGL&Ti|KH
zJNcvQh~~pUGzFs*2GTnX0|CN_rV#;f48|@PE*LvuAUy=-eY8gMWhn0>{4Id6eq-ll
zut5n+u;FXOvHi4JW3vTmZy;prbN1Tk03 at 5odaA{Ef_lLj`R)n(-#$$r at l<MJe00;p
z at 3jqM{wMP<;48?JTOnF<$H!YXHhT$e9fW~+D4LtWP$J6j+VY;HyeMG{;L6T#`_x<b
zM at w}1onGE9pI^r9uz}WF^_?Y!vt`8+F6))C1Ni0jX3-m)iB0RRvTtnY)9^@r2(l$g
z^AHy+zN$8}qClVwH^A5oJ*9)7*o#mNGx9Ndl_4ne1F+_U{{Tf{{2P9}#UP}QV5|MV
z0^SzUqMWcK*j=PH^iZB`J^werD~U!+qD53bUtd1Bw7K<sL_bRZLg|0xqvSWA^Dicp
B-+TZ7
literal 0
HcmV?d00001
>From 78e725d44c982b704aaf6eaf0ee6dee7f15dab28 Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki at intel.com>
Date: Thu, 4 Dec 2025 16:27:05 +0100
Subject: [PATCH 3/4] x
---
clang/tools/clang-repl/ClangRepl.cpp | 2 +-
llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 4 ++--
llvm/test/tools/llvm-objdump/Offloading/Inputs/binary.yaml | 4 ++--
3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/clang/tools/clang-repl/ClangRepl.cpp b/clang/tools/clang-repl/ClangRepl.cpp
index 066f526cba9ae..a70f6a3ded174 100644
--- a/clang/tools/clang-repl/ClangRepl.cpp
+++ b/clang/tools/clang-repl/ClangRepl.cpp
@@ -297,7 +297,7 @@ int main(int argc, const char **argv) {
CB.SetCudaSDK(CudaPath);
if (OffloadArch.empty()) {
- OffloadArch = "sm_35";
+ OffloadArch = "sm_75";
}
CB.SetOffloadArch(OffloadArch);
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index f11d331862081..0ce1df8ce5a42 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -228,9 +228,9 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
return getFullSmVersion() % 10 == 2 ? PTXVersion >= 88
: hasArchAccelFeatures();
}
- // If the user did not provide a target we default to the `sm_30` target.
+ // If the user did not provide a target we default to the `sm_75` target.
std::string getTargetName() const {
- return TargetName.empty() ? "sm_30" : TargetName;
+ return TargetName.empty() ? "sm_75" : TargetName;
}
bool hasTargetName() const { return !TargetName.empty(); }
diff --git a/llvm/test/tools/llvm-objdump/Offloading/Inputs/binary.yaml b/llvm/test/tools/llvm-objdump/Offloading/Inputs/binary.yaml
index 703c93b24dcc0..0b535627eee2b 100644
--- a/llvm/test/tools/llvm-objdump/Offloading/Inputs/binary.yaml
+++ b/llvm/test/tools/llvm-objdump/Offloading/Inputs/binary.yaml
@@ -20,11 +20,11 @@ Members:
- Key: "triple"
Value: "nvptx64-nvidia-cuda"
- Key: "arch"
- Value: "sm_52"
+ Value: "sm_75"
- ImageKind: IMG_None
OffloadKind: OFK_None
String:
- Key: "triple"
Value: "nvptx64-nvidia-cuda"
- Key: "arch"
- Value: "sm_70"
+ Value: "sm_75"
>From afedfa5584c0fca36d6b1ce3d8ca85d0bec765dd Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki at intel.com>
Date: Thu, 4 Dec 2025 09:56:05 +0100
Subject: [PATCH 4/4] update tests to use CUDA sm_75 / ptx63 as default
---
clang/test/Driver/cuda-detect.cu | 35 +++++++++++--------
.../Driver/cuda-flush-denormals-to-zero.cu | 8 ++---
clang/test/Driver/cuda-march.cu | 6 ++--
clang/test/Driver/cuda-options.cu | 6 ++--
clang/test/Driver/cuda-ptxas-path.cu | 4 +--
clang/test/Driver/cuda-short-ptr.cu | 2 +-
clang/test/Driver/cuda-version-check.cu | 20 +++++------
clang/test/Driver/cuda-windows.cu | 2 +-
clang/test/Driver/lto.cu | 2 +-
9 files changed, 47 insertions(+), 38 deletions(-)
diff --git a/clang/test/Driver/cuda-detect.cu b/clang/test/Driver/cuda-detect.cu
index 23b6ba2fcc09d..66e1a25e70eda 100644
--- a/clang/test/Driver/cuda-detect.cu
+++ b/clang/test/Driver/cuda-detect.cu
@@ -60,7 +60,7 @@
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_50 \
// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON \
-// RUN: -check-prefixes PTX42,LIBDEVICE,LIBDEVICE30
+// RUN: -check-prefixes PTX63,LIBDEVICE,LIBDEVICE10
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_60 \
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON \
@@ -96,14 +96,14 @@
// Verify that -nocudainc prevents adding include path to CUDA headers.
-// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
+// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_75 \
// RUN: -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC \
-// RUN: -check-prefixes PTX42,LIBDEVICE,LIBDEVICE35
-// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_35 \
+// RUN: -check-prefixes PTX63,LIBDEVICE,LIBDEVICE10
+// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_75 \
// RUN: -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC \
-// RUN: -check-prefixes PTX42,LIBDEVICE,LIBDEVICE35
+// RUN: -check-prefixes PTX63,LIBDEVICE,LIBDEVICE10
// We should not add any CUDA include paths if there's no valid CUDA installation
// RUN: not %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
@@ -123,10 +123,10 @@
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix MISSINGLIBDEVICE
// Verify that -nocudalib prevents linking libdevice bitcode in.
-// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
+// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_75 \
// RUN: -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON
-// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_35 \
+// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_75 \
// RUN: -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON
@@ -152,10 +152,10 @@
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix CUDA80
-// Verify that if no version file is found, we report the default of 7.0.
+// Verify that if no version file is found, we report the default of 10.0.
// RUN: %clang -### -v --target=x86_64-linux-gnu --cuda-gpu-arch=sm_50 \
// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
-// RUN: | FileCheck %s -check-prefix CUDA70
+// RUN: | FileCheck %s -check-prefix CUDA100
// CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA/usr/local/cuda
// NO-LIBDEVICE: Found CUDA installation: {{.*}}/Inputs/CUDA-nolibdevice/usr/local/cuda
@@ -174,6 +174,7 @@
// LIBDEVICE50-SAME: libdevice.compute_50.10.bc
// PTX42-SAME: "-target-feature" "+ptx42"
// PTX60-SAME: "-target-feature" "+ptx60"
+// PTX63-SAME: "-target-feature" "+ptx63"
// CUDAINC-SAME: "-include" "__clang_cuda_runtime_wrapper.h"
// NOCUDAINC-NOT: "-include" "__clang_cuda_runtime_wrapper.h"
// CUDAINC-SAME: "-internal-isystem" "{{.*}}/Inputs/CUDA{{[_0-9]+}}/usr/local/cuda/include"
@@ -188,14 +189,20 @@
// CHECK-CXXINCLUDE-SAME: {{.*}}"-internal-isystem" "{{.+}}/include/c++/4.8"
// CHECK-CXXINCLUDE: ld{{.*}}"
+// CUDA70: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA70-SAME: -target-sdk-version=7.0
+// CUDA70: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
+// CUDA70-SAME: -target-sdk-version=7.0
+// CUDA70: ld{{.*}}"
+
// CUDA80: "-cc1" "-triple" "nvptx64-nvidia-cuda"
// CUDA80-SAME: -target-sdk-version=8.0
// CUDA80: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
// CUDA80-SAME: -target-sdk-version=8.0
// CUDA80: ld{{.*}}"
-// CUDA70: "-cc1" "-triple" "nvptx64-nvidia-cuda"
-// CUDA70-SAME: -target-sdk-version=7.0
-// CUDA70: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
-// CUDA70-SAME: -target-sdk-version=7.0
-// CUDA70: ld{{.*}}"
+// CUDA100: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA100-SAME: -target-sdk-version=10.0
+// CUDA100: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
+// CUDA100-SAME: -target-sdk-version=10.0
+// CUDA100: ld{{.*}}"
diff --git a/clang/test/Driver/cuda-flush-denormals-to-zero.cu b/clang/test/Driver/cuda-flush-denormals-to-zero.cu
index ea808f2302fbb..adad6dfe632d3 100644
--- a/clang/test/Driver/cuda-flush-denormals-to-zero.cu
+++ b/clang/test/Driver/cuda-flush-denormals-to-zero.cu
@@ -2,14 +2,14 @@
// -fgpu-flush-denormals-to-zero. This should be translated to
// -fdenormal-fp-math-f32=preserve-sign
-// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=FTZ %s
-// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s
// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
// Test alias options -f[no-]cuda-flush-denormals-to-zero
-// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=FTZ %s
-// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-cuda-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-cuda-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
// Test explicit argument, with CUDA offload kind
// RUN: %clang -x hip -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fgpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
diff --git a/clang/test/Driver/cuda-march.cu b/clang/test/Driver/cuda-march.cu
index 2dbb9cdf6f589..7684b1df0d685 100644
--- a/clang/test/Driver/cuda-march.cu
+++ b/clang/test/Driver/cuda-march.cu
@@ -5,12 +5,12 @@
// RUN: %clang -### --target=x86_64-linux-gnu -c \
// RUN: -nogpulib -nogpuinc -march=haswell %s 2>&1 | FileCheck %s
// RUN: %clang -### --target=x86_64-linux-gnu -c \
-// RUN: -nogpulib -nogpuinc -march=haswell --cuda-gpu-arch=sm_52 %s 2>&1 | FileCheck %s
+// RUN: -nogpulib -nogpuinc -march=haswell --cuda-gpu-arch=sm_75 %s 2>&1 | FileCheck %s
// CHECK: "-cc1"{{.*}} "-triple" "nvptx
-// CHECK-SAME: "-target-cpu" "sm_52"
+// CHECK-SAME: "-target-cpu" "sm_75"
// CHECK: ptxas
-// CHECK-SAME: "--gpu-name" "sm_52"
+// CHECK-SAME: "--gpu-name" "sm_75"
// CHECK: "-cc1"{{.*}} "-target-cpu" "haswell"
diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu
index fc8e83a2bb279..312556707ef19 100644
--- a/clang/test/Driver/cuda-options.cu
+++ b/clang/test/Driver/cuda-options.cu
@@ -104,12 +104,12 @@
// RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,NOARCH-SM70 %s
// c) if --no-cuda-gpu-arch=X negates all preceding --cuda-gpu-arch=X
-// we default to sm_52 -- same as if no --cuda-gpu-arch were passed.
+// we default to sm_75 -- same as if no --cuda-gpu-arch were passed.
// RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
// RUN: -nogpulib -nogpuinc --cuda-gpu-arch=sm_70 --cuda-gpu-arch=sm_60 \
// RUN: --no-cuda-gpu-arch=sm_70 --no-cuda-gpu-arch=sm_60 \
// RUN: -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,NOARCH-SM70 %s
+// RUN: | FileCheck -check-prefixes ARCH-SM75,NOARCH-SM60,NOARCH-SM70 %s
// d) --no-cuda-gpu-arch=X is a no-op if there's no preceding --cuda-gpu-arch=X
// RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
@@ -193,6 +193,8 @@
// NOARCH-SM60-NOT: "-cc1"{{.*}}"-target-cpu" "sm_60"
// ARCH-SM70: "-cc1"{{.*}}"-target-cpu" "sm_70"
// NOARCH-SM70-NOT: "-cc1"{{.*}}"-target-cpu" "sm_70"
+// ARCH-SM75: "-cc1"{{.*}}"-target-cpu" "sm_75"
+// NOARCH-SM75-NOT: "-cc1"{{.*}}"-target-cpu" "sm_75"
// ARCHALLERROR: error: unsupported CUDA gpu architecture: all
// Match device-side preprocessor and compiler phases with -save-temps.
diff --git a/clang/test/Driver/cuda-ptxas-path.cu b/clang/test/Driver/cuda-ptxas-path.cu
index f36dcc94558f1..7027984d07b2e 100644
--- a/clang/test/Driver/cuda-ptxas-path.cu
+++ b/clang/test/Driver/cuda-ptxas-path.cu
@@ -1,8 +1,8 @@
// RUN: %clang -### --target=i386-unknown-linux \
-// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda \
+// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: --ptxas-path=/some/path/to/ptxas %s 2>&1 \
// RUN: | FileCheck %s
// CHECK-NOT: "ptxas"
// CHECK: "/some/path/to/ptxas"
-// CHECK-SAME: "--gpu-name" "sm_52"
+// CHECK-SAME: "--gpu-name" "sm_75"
diff --git a/clang/test/Driver/cuda-short-ptr.cu b/clang/test/Driver/cuda-short-ptr.cu
index e0ae4505e0b56..bf3c1c168b922 100644
--- a/clang/test/Driver/cuda-short-ptr.cu
+++ b/clang/test/Driver/cuda-short-ptr.cu
@@ -1,6 +1,6 @@
// Checks that cuda compilation does the right thing when passed -fcuda-short-ptr
-// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-short-ptr -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck %s
+// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-short-ptr -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck %s
// CHECK: "-mllvm" "--nvptx-short-ptr"
// CHECK-SAME: "-fcuda-short-ptr"
diff --git a/clang/test/Driver/cuda-version-check.cu b/clang/test/Driver/cuda-version-check.cu
index 9eceb928ffabd..4b43012b39483 100644
--- a/clang/test/Driver/cuda-version-check.cu
+++ b/clang/test/Driver/cuda-version-check.cu
@@ -1,4 +1,4 @@
-// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_20 --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
+// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_20 --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \
// RUN: FileCheck %s --check-prefix=OK
// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_20 --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda 2>&1 %s | \
// RUN: FileCheck %s --check-prefix=OK
@@ -15,33 +15,33 @@
// RUN: --cuda-path=%S/Inputs/CUDA-unknown/usr/local/cuda 2>&1 %s | \
// RUN: FileCheck %s --check-prefix=UNKNOWN_VERSION_CXX
-// The installation at Inputs/CUDA is CUDA 7.0, which doesn't support sm_60.
-// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
+// The installation at Inputs/CUDA_70 is CUDA 7.0, which doesn't support sm_60.
+// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \
// RUN: FileCheck %s --check-prefix=ERR_SM60
// This should only complain about sm_60, not sm_35.
// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_35 \
-// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
+// RUN: --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \
// RUN: FileCheck %s --check-prefix=ERR_SM60 --check-prefix=OK_SM35
// We should get two errors here, one for sm_60 and one for sm_61.
// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_61 \
-// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
+// RUN: --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \
// RUN: FileCheck %s --check-prefix=ERR_SM60 --check-prefix=ERR_SM61
// We should still get an error if we pass -nocudainc, because this compilation
// would invoke ptxas, and we do a version check on that, too.
-// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
+// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 -nocudainc --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \
// RUN: FileCheck %s --check-prefix=ERR_SM60
// If with -nocudainc and -E, we don't touch the CUDA install, so we
// shouldn't get an error.
// RUN: %clang --target=x86_64-linux -v -### -E --cuda-device-only --cuda-gpu-arch=sm_60 -nocudainc \
-// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
+// RUN: --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \
// RUN: FileCheck %s --check-prefix=OK
// --no-cuda-version-check should suppress all of these errors.
-// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 \
+// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 \
// RUN: --no-cuda-version-check %s | \
// RUN: FileCheck %s --check-prefix=OK
@@ -49,9 +49,9 @@
// therefore we should not get an error in host-only mode. We use the -S here
// to avoid the error being produced in case by the assembler tool, which does
// the same check.
-// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-host-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda -S 2>&1 %s | \
+// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-host-only --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda -S 2>&1 %s | \
// RUN: FileCheck %s --check-prefix=OK
-// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-device-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda -S 2>&1 %s | \
+// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-device-only --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda -S 2>&1 %s | \
// RUN: FileCheck %s --check-prefix=ERR_SM60
// OK-NOT: error: GPU arch
diff --git a/clang/test/Driver/cuda-windows.cu b/clang/test/Driver/cuda-windows.cu
index 4459e809072d9..64d75ddc223b4 100644
--- a/clang/test/Driver/cuda-windows.cu
+++ b/clang/test/Driver/cuda-windows.cu
@@ -3,7 +3,7 @@
// RUN: %clang -v --target=i386-pc-windows-mingw32 \
// RUN: --sysroot=%S/Inputs/CUDA-windows 2>&1 %s -### | FileCheck %s
-// CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0
+// CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0
// CHECK: "-cc1" "-triple" "nvptx-nvidia-cuda"
// CHECK-SAME: "-fms-extensions"
// CHECK-SAME: "-fms-compatibility"
diff --git a/clang/test/Driver/lto.cu b/clang/test/Driver/lto.cu
index 596e6cfe07379..e4a773b487c6c 100644
--- a/clang/test/Driver/lto.cu
+++ b/clang/test/Driver/lto.cu
@@ -26,7 +26,7 @@
// llvm-bc and llvm-ll outputs need to match regular suffixes
// (unfortunately).
-// RUN: %clangxx %s --target=x86_64-unknown-linux-gnu --no-offload-new-driver -nocudainc -nocudalib -flto -save-temps --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -### 2> %t
+// RUN: %clangxx %s --target=x86_64-unknown-linux-gnu --no-offload-new-driver -nocudainc -nocudalib -flto -save-temps --cuda-path=%S/Inputs/CUDA/usr/local/cuda -### 2> %t
// RUN: FileCheck -check-prefix=CHECK-COMPILELINK-SUFFIXES < %t %s
//
// CHECK-COMPILELINK-SUFFIXES: "-o" "[[CPP:.*lto-host.*\.cui]]" "-x" "cuda" "{{.*}}lto.cu"
More information about the llvm-commits
mailing list