[Openmp-commits] [clang] [flang] [mlir] [openmp] Update the default SM to 7.5 (PR #177469)

Thu Jan 22 22:07:01 PST 2026

https://github.com/justinfargnoli updated https://github.com/llvm/llvm-project/pull/177469

>From 16b02ef95155f15cfcd1cca46da6651960b2e1b5 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Thu, 22 Jan 2026 21:07:17 +0000
Subject: [PATCH 1/4] Initial commit

---
 clang/include/clang/Basic/OffloadArch.h            | 2 +-
 mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h   | 2 +-
 mlir/include/mlir/Dialect/GPU/Transforms/Passes.td | 2 +-
 mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td        | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/Basic/OffloadArch.h b/clang/include/clang/Basic/OffloadArch.h
index 31a56b47cbb29..418b1cd653a56 100644
--- a/clang/include/clang/Basic/OffloadArch.h
+++ b/clang/include/clang/Basic/OffloadArch.h
@@ -117,7 +117,7 @@ enum class OffloadArch {
   BMG_G21,
   LAST,
 
-  CudaDefault = OffloadArch::SM_52,
+  CudaDefault = OffloadArch::SM_75,
   HIPDefault = OffloadArch::GFX906,
 };
 
diff --git a/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h b/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h
index 34c85de3418ec..4084d3d9ff329 100644
--- a/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h
@@ -28,7 +28,7 @@ struct GPUToNVVMPipelineOptions
       llvm::cl::init("nvptx64-nvidia-cuda")};
   PassOptions::Option<std::string> cubinChip{
       *this, "cubin-chip", llvm::cl::desc("Chip to use to serialize to cubin."),
-      llvm::cl::init("sm_50")};
+      llvm::cl::init("sm_75")};
   PassOptions::Option<std::string> cubinFeatures{
       *this, "cubin-features",
       llvm::cl::desc("Features to use to serialize to cubin."),
diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
index 885d14cf645b1..f3c2b9ad830fb 100644
--- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
@@ -143,7 +143,7 @@ def GpuNVVMAttachTarget: Pass<"nvvm-attach-target", ""> {
            /*default=*/ "\"nvptx64-nvidia-cuda\"",
            "Target triple.">,
     Option<"chip", "chip", "std::string",
-           /*default=*/"\"sm_50\"",
+           /*default=*/"\"sm_75\"",
            "Target chip.">,
     Option<"features", "features", "std::string",
            /*default=*/"\"+ptx60\"",
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 64a52acbb2278..cd2a0f021127e 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -6373,7 +6373,7 @@ def NVVM_TargetAttr : NVVM_Attr<"NVVMTarget", "target",
   let parameters = (ins
     DefaultValuedParameter<"int", "2", "Optimization level to apply.">:$O,
     StringRefParameter<"Target triple.", "\"nvptx64-nvidia-cuda\"">:$triple,
-    StringRefParameter<"Target chip.", "\"sm_50\"">:$chip,
+    StringRefParameter<"Target chip.", "\"sm_75\"">:$chip,
     StringRefParameter<"Target chip features.", "\"+ptx60\"">:$features,
     OptionalParameter<"DictionaryAttr", "Target specific flags.">:$flags,
     OptionalParameter<"ArrayAttr", "Files to link to the LLVM module.">:$link,

>From 0c7d3daf0d95ef3ffb3311505fa95f262866c479 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Thu, 22 Jan 2026 21:19:45 +0000
Subject: [PATCH 2/4] Fix NVVMOps.td builder

---
 mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index cd2a0f021127e..ee7c30a6d4159 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -6385,7 +6385,7 @@ def NVVM_TargetAttr : NVVM_Attr<"NVVMTarget", "target",
   let builders = [
     AttrBuilder<(ins CArg<"int", "2">:$optLevel,
                      CArg<"StringRef", "\"nvptx64-nvidia-cuda\"">:$triple,
-                     CArg<"StringRef", "\"sm_50\"">:$chip,
+                     CArg<"StringRef", "\"sm_75\"">:$chip,
                      CArg<"StringRef", "\"+ptx60\"">:$features,
                      CArg<"DictionaryAttr", "nullptr">:$targetFlags,
                      CArg<"ArrayAttr", "nullptr">:$linkFiles,

>From 434e30cbf5d04b761031d51ead8d7807d76a5a2c Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Thu, 22 Jan 2026 21:19:52 +0000
Subject: [PATCH 3/4] Update release notes

---
 clang/docs/ReleaseNotes.rst  | 4 ++++
 flang/docs/ReleaseNotes.md   | 4 ++++
 mlir/docs/ReleaseNotes.md    | 9 +++++++++
 openmp/docs/ReleaseNotes.rst | 6 ++++++
 4 files changed, 23 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 791212dafd342..a30ca3be9a651 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -228,6 +228,10 @@ CUDA/HIP Language Changes
 CUDA Support
 ^^^^^^^^^^^^
 
+- The default CUDA architecture has been changed from ``sm_52`` to ``sm_75``.
+  ``sm_75`` is the oldest GPU variant compatible with the widest range of recent
+  major CUDA Toolkit versions (11/12/13).
+
 AIX Support
 ^^^^^^^^^^^
 
diff --git a/flang/docs/ReleaseNotes.md b/flang/docs/ReleaseNotes.md
index 888da4d58b868..2dfe79bfe7014 100644
--- a/flang/docs/ReleaseNotes.md
+++ b/flang/docs/ReleaseNotes.md
@@ -33,6 +33,10 @@ page](https://llvm.org/releases/).
 
 ## Non-comprehensive list of changes in this release
 
+* The default CUDA architecture for GPU offloading has been changed from
+  `sm_52` to `sm_75`. `sm_75` is the oldest GPU variant compatible with the
+  widest range of recent major CUDA Toolkit versions (11/12/13).
+
 ## New Compiler Flags
 
 ## Windows Support
diff --git a/mlir/docs/ReleaseNotes.md b/mlir/docs/ReleaseNotes.md
index ab5e9adaa68bc..16b93c8909670 100644
--- a/mlir/docs/ReleaseNotes.md
+++ b/mlir/docs/ReleaseNotes.md
@@ -8,6 +8,15 @@ specifically, it is a snapshot of the MLIR development at the time of the releas
 
 [TOC]
 
+## LLVM 21
+
+### GPU/NVVM Changes
+
+- The default NVVM target architecture has been changed from `sm_50` to `sm_75`.
+  `sm_75` is the oldest GPU variant compatible with the widest range of recent
+  major CUDA Toolkit versions (11/12/13). This affects the `NVVMTargetAttr`,
+  `GpuNVVMAttachTarget` pass, and the `gpu-lower-to-nvvm-pipeline`.
+
 ## LLVM 20
 
 All the MLIR runners other than `mlir-cpu-runner` have been removed, as their functionality has been merged into it, and it has been renamed to `mlir-runner`.
diff --git a/openmp/docs/ReleaseNotes.rst b/openmp/docs/ReleaseNotes.rst
index 6c1a46caf1d81..4538abdb119d0 100644
--- a/openmp/docs/ReleaseNotes.rst
+++ b/openmp/docs/ReleaseNotes.rst
@@ -20,6 +20,12 @@ from the `LLVM releases web site <https://llvm.org/releases/>`_.
 Non-comprehensive list of changes in this release
 =================================================
 
+CUDA Offloading
+---------------
+- The default CUDA architecture for GPU offloading has been changed from
+  ``sm_52`` to ``sm_75``. ``sm_75`` is the oldest GPU variant compatible with
+  the widest range of recent major CUDA Toolkit versions (11/12/13).
+
 Device Runtime
 --------------
 - Changed the OpenMP DeviceRTL to use 'generic' IR. The

>From 4f9da83aab63ae537487cfcfb803217a92b0e5d8 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Fri, 23 Jan 2026 06:06:43 +0000
Subject: [PATCH 4/4] Test out fixes

---
 clang/test/Driver/clang_f_opts.c            |  2 +-
 clang/test/Driver/cuda-arch-list.cu         | 16 +++++++--------
 clang/test/Driver/cuda-constructor-alias.cu |  2 +-
 clang/test/Driver/cuda-march.cu             | 22 ++++++++++++++-------
 clang/test/Driver/cuda-options.cu           |  6 ++++--
 clang/test/Driver/cuda-ptxas-path.cu        |  4 ++--
 clang/test/Driver/cuda-simple.cu            |  6 +++---
 clang/test/Driver/cuda-windows.cu           |  4 ++--
 clang/test/Driver/fbasic-block-sections.c   |  2 +-
 clang/test/Driver/lto.cu                    |  6 +++---
 clang/test/Driver/opt-record.c              |  4 ++--
 clang/test/Driver/unknown-std.cpp           |  2 +-
 clang/test/Headers/cuda_with_openmp.cu      |  4 ++--
 13 files changed, 45 insertions(+), 35 deletions(-)

diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c
index 5871f1580d6b7..08bd4b9aa22b1 100644
--- a/clang/test/Driver/clang_f_opts.c
+++ b/clang/test/Driver/clang_f_opts.c
@@ -69,7 +69,7 @@
 
 //
 // RUN: %clang -### -x cuda -nocudainc -nocudalib \
-// RUN:    -c -fprofile-sample-use=%S/Inputs/file.prof --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
+// RUN:    -c -fprofile-sample-use=%S/Inputs/file.prof --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 \
 // RUN:  | FileCheck -check-prefix=CHECK-CUDA-SAMPLE-PROFILE %s
 // -fprofile-sample-use should not be passed to the GPU compilation
 // CHECK-CUDA-SAMPLE-PROFILE: "-cc1"
diff --git a/clang/test/Driver/cuda-arch-list.cu b/clang/test/Driver/cuda-arch-list.cu
index d4825f202d733..610cc0e20d517 100644
--- a/clang/test/Driver/cuda-arch-list.cu
+++ b/clang/test/Driver/cuda-arch-list.cu
@@ -13,10 +13,10 @@
 // RUN:   --offload-arch=sm_70 --offload-arch=sm_60 %s 2>&1 \
 // RUN: | FileCheck -check-prefixes=DEVICE60-60-70,DEVICE70-60-70,HOST-60-70 %s
 
-// Verify that it works with no explicit arch (defaults to sm_52)
+// Verify that it works with no explicit arch (defaults to sm_75)
 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nocudainc -nocudalib \
-// RUN:   --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=DEVICE52,HOST52 %s
+// RUN:   --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=DEVICE75,HOST75 %s
 
 // Verify that --no-offload-arch negates preceding --offload-arch
 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nocudainc -nocudalib \
@@ -46,12 +46,12 @@
 // HOST-60-70: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
 // HOST-60-70-SAME: "-D__CUDA_ARCH_LIST__=600,700"
 
-// DEVICE52: "-cc1" "-triple" "nvptx64-nvidia-cuda"
-// DEVICE52-SAME: "-target-cpu" "sm_52"
-// DEVICE52-SAME: "-D__CUDA_ARCH_LIST__=520"
+// DEVICE75: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// DEVICE75-SAME: "-target-cpu" "sm_75"
+// DEVICE75-SAME: "-D__CUDA_ARCH_LIST__=750"
 
-// HOST52: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
-// HOST52-SAME: "-D__CUDA_ARCH_LIST__=520"
+// HOST75: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
+// HOST75-SAME: "-D__CUDA_ARCH_LIST__=750"
 
 // DEVICE70-ONLY: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 // DEVICE70-ONLY-SAME: "-target-cpu" "sm_70"
diff --git a/clang/test/Driver/cuda-constructor-alias.cu b/clang/test/Driver/cuda-constructor-alias.cu
index b7867e095f27f..f84bda46f44fc 100644
--- a/clang/test/Driver/cuda-constructor-alias.cu
+++ b/clang/test/Driver/cuda-constructor-alias.cu
@@ -1,7 +1,7 @@
 // Check that we don't pass -mconstructor-aliases to CUDA device-side
 // compilation, but we do pass it to host-side compilation.
 
-// RUN: %clang -### --target=x86_64-linux-gnu --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck %s
+// RUN: %clang -### --target=x86_64-linux-gnu --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 | FileCheck %s
 // CHECK: "-cc1"
 // CHECK-NOT: "-fcuda-is-device" {{.*}}"-mconstructor-aliases"
 // CHECK-NOT: "-mconstructor-aliases" {{.*}}"-fcuda-is-device"
diff --git a/clang/test/Driver/cuda-march.cu b/clang/test/Driver/cuda-march.cu
index 2dbb9cdf6f589..c2e46bbaceacd 100644
--- a/clang/test/Driver/cuda-march.cu
+++ b/clang/test/Driver/cuda-march.cu
@@ -3,14 +3,22 @@
 // compilation or ptxas!)
 
 // RUN: %clang -### --target=x86_64-linux-gnu -c \
-// RUN: -nogpulib -nogpuinc -march=haswell %s 2>&1 | FileCheck %s
+// RUN: -nogpulib -nogpuinc -march=haswell %s 2>&1 | FileCheck %s --check-prefix=CHECK-DEFAULT
 // RUN: %clang -### --target=x86_64-linux-gnu -c \
-// RUN: -nogpulib -nogpuinc -march=haswell --cuda-gpu-arch=sm_52 %s 2>&1 | FileCheck %s
+// RUN: -nogpulib -nogpuinc -march=haswell --cuda-gpu-arch=sm_52 %s 2>&1 | FileCheck %s --check-prefix=CHECK-SM52
 
-// CHECK: "-cc1"{{.*}} "-triple" "nvptx
-// CHECK-SAME: "-target-cpu" "sm_52"
+// CHECK-DEFAULT: "-cc1"{{.*}} "-triple" "nvptx
+// CHECK-DEFAULT-SAME: "-target-cpu" "sm_75"
 
-// CHECK: ptxas
-// CHECK-SAME: "--gpu-name" "sm_52"
+// CHECK-DEFAULT: ptxas
+// CHECK-DEFAULT-SAME: "--gpu-name" "sm_75"
 
-// CHECK: "-cc1"{{.*}} "-target-cpu" "haswell"
+// CHECK-DEFAULT: "-cc1"{{.*}} "-target-cpu" "haswell"
+
+// CHECK-SM52: "-cc1"{{.*}} "-triple" "nvptx
+// CHECK-SM52-SAME: "-target-cpu" "sm_52"
+
+// CHECK-SM52: ptxas
+// CHECK-SM52-SAME: "--gpu-name" "sm_52"
+
+// CHECK-SM52: "-cc1"{{.*}} "-target-cpu" "haswell"
diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu
index fc8e83a2bb279..312556707ef19 100644
--- a/clang/test/Driver/cuda-options.cu
+++ b/clang/test/Driver/cuda-options.cu
@@ -104,12 +104,12 @@
 // RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,NOARCH-SM70 %s
 
 // c) if --no-cuda-gpu-arch=X negates all preceding --cuda-gpu-arch=X
-//    we default to sm_52 -- same as if no --cuda-gpu-arch were passed.
+//    we default to sm_75 -- same as if no --cuda-gpu-arch were passed.
 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=sm_70 --cuda-gpu-arch=sm_60 \
 // RUN:   --no-cuda-gpu-arch=sm_70 --no-cuda-gpu-arch=sm_60 \
 // RUN:   -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,NOARCH-SM70 %s
+// RUN: | FileCheck -check-prefixes ARCH-SM75,NOARCH-SM60,NOARCH-SM70 %s
 
 // d) --no-cuda-gpu-arch=X is a no-op if there's no preceding --cuda-gpu-arch=X
 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
@@ -193,6 +193,8 @@
 // NOARCH-SM60-NOT: "-cc1"{{.*}}"-target-cpu" "sm_60"
 // ARCH-SM70: "-cc1"{{.*}}"-target-cpu" "sm_70"
 // NOARCH-SM70-NOT: "-cc1"{{.*}}"-target-cpu" "sm_70"
+// ARCH-SM75: "-cc1"{{.*}}"-target-cpu" "sm_75"
+// NOARCH-SM75-NOT: "-cc1"{{.*}}"-target-cpu" "sm_75"
 // ARCHALLERROR: error: unsupported CUDA gpu architecture: all
 
 // Match device-side preprocessor and compiler phases with -save-temps.
diff --git a/clang/test/Driver/cuda-ptxas-path.cu b/clang/test/Driver/cuda-ptxas-path.cu
index f36dcc94558f1..b381e29433c7d 100644
--- a/clang/test/Driver/cuda-ptxas-path.cu
+++ b/clang/test/Driver/cuda-ptxas-path.cu
@@ -1,8 +1,8 @@
 // RUN: %clang -### --target=i386-unknown-linux \
-// RUN:   --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda \
+// RUN:   --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
 // RUN:   --ptxas-path=/some/path/to/ptxas %s 2>&1 \
 // RUN: | FileCheck %s
 
 // CHECK-NOT: "ptxas"
 // CHECK: "/some/path/to/ptxas"
-// CHECK-SAME: "--gpu-name" "sm_52"
+// CHECK-SAME: "--gpu-name" "sm_75"
diff --git a/clang/test/Driver/cuda-simple.cu b/clang/test/Driver/cuda-simple.cu
index 349ee42464cc1..d884024e783af 100644
--- a/clang/test/Driver/cuda-simple.cu
+++ b/clang/test/Driver/cuda-simple.cu
@@ -1,11 +1,11 @@
 // Verify that we can parse a simple CUDA file with or without -save-temps
 // http://llvm.org/PR22936
-// RUN: %clang --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: %clang --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
 // RUN:        -nocudainc -nocudalib -Werror -fsyntax-only %s
 //
 // Verify that we pass -x cuda-cpp-output to compiler after
 // preprocessing a CUDA file
-// RUN: %clang --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: %clang --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
 // RUN:        -Werror -### -save-temps -c %s 2>&1 | FileCheck %s
 // CHECK-LABEL: "-cc1"
 // CHECK: "-E"
@@ -14,7 +14,7 @@
 // CHECK: "-x" "cuda-cpp-output"
 //
 // Verify that compiler accepts CUDA syntax with "-x cuda-cpp-output".
-// RUN: %clang --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: %clang --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
 // RUN:        -Werror -fsyntax-only -x cuda-cpp-output %s
 
 extern "C" int cudaConfigureCall(int, int);
diff --git a/clang/test/Driver/cuda-windows.cu b/clang/test/Driver/cuda-windows.cu
index 4459e809072d9..ddb1825fefd87 100644
--- a/clang/test/Driver/cuda-windows.cu
+++ b/clang/test/Driver/cuda-windows.cu
@@ -1,6 +1,6 @@
-// RUN: %clang -v --target=i386-pc-windows-msvc \
+// RUN: %clang -v --target=i386-pc-windows-msvc --no-cuda-version-check \
 // RUN:   --sysroot=%S/Inputs/CUDA-windows 2>&1 %s -### | FileCheck %s
-// RUN: %clang -v --target=i386-pc-windows-mingw32 \
+// RUN: %clang -v --target=i386-pc-windows-mingw32 --no-cuda-version-check \
 // RUN:   --sysroot=%S/Inputs/CUDA-windows 2>&1 %s -### | FileCheck %s
 
 // CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0
diff --git a/clang/test/Driver/fbasic-block-sections.c b/clang/test/Driver/fbasic-block-sections.c
index 6dfba5f404cee..770207e7fdc6f 100644
--- a/clang/test/Driver/fbasic-block-sections.c
+++ b/clang/test/Driver/fbasic-block-sections.c
@@ -14,7 +14,7 @@
 // RUN: not %clang -### --target=x86_64 -fbasic-block-sections=list %s -S 2>&1 | FileCheck -check-prefix=CHECK-INVALID-VALUE %s
 // RUN: %clang -### --target=x86_64 -fbasic-block-sections=list= %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-NULL-LIST %s
 // RUN: %clang -### --target=x86_64 -fbasic-block-sections=none %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-NONE %s
-// RUN: %clang -### -x cuda -nocudainc -nocudalib --target=x86_64 -fbasic-block-sections=all --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s -c 2>&1 \
+// RUN: %clang -### -x cuda -nocudainc -nocudalib --target=x86_64 -fbasic-block-sections=all --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s -c 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-CUDA %s
 
 //
diff --git a/clang/test/Driver/lto.cu b/clang/test/Driver/lto.cu
index 596e6cfe07379..c5484eac2dc04 100644
--- a/clang/test/Driver/lto.cu
+++ b/clang/test/Driver/lto.cu
@@ -26,7 +26,7 @@
 
 // llvm-bc and llvm-ll outputs need to match regular suffixes
 // (unfortunately).
-// RUN: %clangxx %s --target=x86_64-unknown-linux-gnu --no-offload-new-driver -nocudainc -nocudalib -flto -save-temps --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -### 2> %t
+// RUN: %clangxx %s --target=x86_64-unknown-linux-gnu --no-offload-new-driver -nocudainc -nocudalib -flto -save-temps --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda -### 2> %t
 // RUN: FileCheck -check-prefix=CHECK-COMPILELINK-SUFFIXES < %t %s
 //
 // CHECK-COMPILELINK-SUFFIXES: "-o" "[[CPP:.*lto-host.*\.cui]]" "-x" "cuda" "{{.*}}lto.cu"
@@ -61,10 +61,10 @@
 
 // -flto passes along an explicit debugger tuning argument.
 // RUN: %clangxx -nocudainc -nocudalib --no-offload-new-driver \
-// RUN:          --target=x86_64-unknown-linux -### %s -flto -glldb --offload-arch=sm_52 --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda 2> %t
+// RUN:          --target=x86_64-unknown-linux -### %s -flto -glldb --offload-arch=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda 2> %t
 // RUN: FileCheck -check-prefix=CHECK-TUNING-LLDB < %t %s
 // RUN: %clangxx -nocudainc -nocudalib --no-offload-new-driver \
-// RUN:          --target=x86_64-unknown-linux -### %s -flto -g --offload-arch=sm_52 --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda 2> %t
+// RUN:          --target=x86_64-unknown-linux -### %s -flto -g --offload-arch=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda 2> %t
 // RUN: FileCheck -check-prefix=CHECK-NO-TUNING < %t %s
 //
 // CHECK-TUNING-LLDB:   "-plugin-opt=-debugger-tune=lldb"
diff --git a/clang/test/Driver/opt-record.c b/clang/test/Driver/opt-record.c
index 86d00d5612647..1bd98fe97ad2f 100644
--- a/clang/test/Driver/opt-record.c
+++ b/clang/test/Driver/opt-record.c
@@ -6,8 +6,8 @@
 // RUN: %clang -### -c -fsave-optimization-record %s 2>&1 | FileCheck %s -check-prefix=CHECK-NO-O
 // RUN: %clang -### -save-temps -c -fsave-optimization-record %s 2>&1 | FileCheck %s -check-prefix=CHECK-NO-O
 // RUN: %clang -### -fsave-optimization-record %s 2>&1 | FileCheck %s -check-prefix=CHECK-NO-O
-// RUN: %clang -### -S -fsave-optimization-record -x cuda -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck %s -check-prefix=CHECK-NO-O -check-prefix=CHECK-CUDA-DEV
-// RUN: %clang -### -fsave-optimization-record -x cuda -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck %s -check-prefix=CHECK-NO-O -check-prefix=CHECK-CUDA-DEV
+// RUN: %clang -### -S -fsave-optimization-record -x cuda -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 | FileCheck %s -check-prefix=CHECK-NO-O -check-prefix=CHECK-CUDA-DEV
+// RUN: %clang -### -fsave-optimization-record -x cuda -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 | FileCheck %s -check-prefix=CHECK-NO-O -check-prefix=CHECK-CUDA-DEV
 // RUN: %clang -### -S -o FOO -fsave-optimization-record -foptimization-record-file=BAR.txt %s 2>&1 | FileCheck %s -check-prefix=CHECK-EQ
 // RUN: %clang -### -S -o FOO -foptimization-record-file=BAR.txt %s 2>&1 | FileCheck %s -check-prefix=CHECK-EQ
 // RUN: %clang -### -S -o FOO -foptimization-record-file=BAR.txt -fno-save-optimization-record %s 2>&1 | FileCheck %s --check-prefix=CHECK-FOPT-DISABLE
diff --git a/clang/test/Driver/unknown-std.cpp b/clang/test/Driver/unknown-std.cpp
index 5c58042a0a2c7..aeb793829faa4 100644
--- a/clang/test/Driver/unknown-std.cpp
+++ b/clang/test/Driver/unknown-std.cpp
@@ -4,7 +4,7 @@
 
 // RUN: not %clang %s -std=foobar -c 2>&1 | FileCheck --match-full-lines %s
 // RUN: not %clang -x objective-c++ %s -std=foobar -c 2>&1 | FileCheck --match-full-lines %s
-// RUN: not %clang -x cuda -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: not %clang -x cuda -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
 // RUN:   %s -std=foobar -c 2>&1 | FileCheck --match-full-lines %s
 // RUN: not %clang -x hip -nocudainc -nocudalib %s -std=foobar -c 2>&1 \
 // RUN:   | FileCheck --match-full-lines %s
diff --git a/clang/test/Headers/cuda_with_openmp.cu b/clang/test/Headers/cuda_with_openmp.cu
index 49850cd1c1781..ec9706fa34cde 100644
--- a/clang/test/Headers/cuda_with_openmp.cu
+++ b/clang/test/Headers/cuda_with_openmp.cu
@@ -4,6 +4,6 @@
 
 // REQUIRES: nvptx-registered-target
 
-// RUN: %if x86-registered-target %{ %clang -target x86_64-unknown-linux -x cuda -fopenmp -c %s -o - --cuda-path=%S/../Driver/Inputs/CUDA/usr/local/cuda -nocudalib -isystem %S/Inputs/include -isystem %S/../../lib/Headers -fsyntax-only %}
-// RUN: %if systemz-registered-target %{ %clang -target s390x-ibm-zos -x cuda -fopenmp -c %s -o - --cuda-path=%S/../Driver/Inputs/CUDA/usr/local/cuda -nocudalib -isystem %S/Inputs/include -isystem %S/../../lib/Headers -fsyntax-only %}
+// RUN: %if x86-registered-target %{ %clang -target x86_64-unknown-linux -x cuda -fopenmp -c %s -o - --cuda-path=%S/../Driver/Inputs/CUDA_102/usr/local/cuda -nocudalib -isystem %S/Inputs/include -isystem %S/../../lib/Headers -fsyntax-only %}
+// RUN: %if systemz-registered-target %{ %clang -target s390x-ibm-zos -x cuda -fopenmp -c %s -o - --cuda-path=%S/../Driver/Inputs/CUDA_102/usr/local/cuda -nocudalib -isystem %S/Inputs/include -isystem %S/../../lib/Headers -fsyntax-only %}