[clang] 0060fff - [CUDA] Bump default GPU architecture to sm_35.
Artem Belevich via cfe-commits
cfe-commits at lists.llvm.org
Mon Aug 23 13:26:40 PDT 2021
Author: Artem Belevich
Date: 2021-08-23T13:24:45-07:00
New Revision: 0060fffc822261ff7350e34371c4456f363f866d
URL: https://github.com/llvm/llvm-project/commit/0060fffc822261ff7350e34371c4456f363f866d
DIFF: https://github.com/llvm/llvm-project/commit/0060fffc822261ff7350e34371c4456f363f866d.diff
LOG: [CUDA] Bump default GPU architecture to sm_35.
It's the oldest GPU architecture currently supported by all CUDA versions clang
can use.
Differential Revision: https://reviews.llvm.org/D108235
Added:
Modified:
clang/lib/Driver/Driver.cpp
clang/test/Driver/cuda-external-tools.cu
clang/test/Driver/cuda-march.cu
clang/test/Driver/cuda-options.cu
clang/test/Driver/cuda-ptxas-path.cu
clang/test/Driver/lto.cu
clang/test/Driver/openmp-offload.c
clang/test/Driver/opt-record.c
clang/test/Driver/thinlto.cu
Removed:
clang/test/Driver/cuda-options-freebsd.cu
################################################################################
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 5c323cb6ea23..cdd8ee4ca378 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -2765,7 +2765,7 @@ class OffloadingActionBuilder final {
CudaActionBuilder(Compilation &C, DerivedArgList &Args,
const Driver::InputList &Inputs)
: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_Cuda) {
- DefaultCudaArch = CudaArch::SM_20;
+ DefaultCudaArch = CudaArch::SM_35;
}
StringRef getCanonicalOffloadArch(StringRef ArchStr) override {
diff --git a/clang/test/Driver/cuda-external-tools.cu b/clang/test/Driver/cuda-external-tools.cu
index f73363508efe..4af5a3ce6016 100644
--- a/clang/test/Driver/cuda-external-tools.cu
+++ b/clang/test/Driver/cuda-external-tools.cu
@@ -7,96 +7,120 @@
// Regular compiles with -O{0,1,2,3,4,fast}. -O4 and -Ofast map to ptxas O3.
// RUN: %clang -### -target x86_64-linux-gnu -O0 -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT0 %s
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s
// RUN: %clang -### -target x86_64-linux-gnu -O1 -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT1 %s
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT1 %s
// RUN: %clang -### -target x86_64-linux-gnu -O2 -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT2 %s
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT2 %s
// RUN: %clang -### -target x86_64-linux-gnu -O3 -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT3 %s
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT3 %s
// RUN: %clang -### -target x86_64-linux-gnu -O4 -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT3 %s
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT3 %s
// RUN: %clang -### -target x86_64-linux-gnu -Ofast -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT3 %s
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT3 %s
// Generating relocatable device code
// RUN: %clang -### -target x86_64-linux-gnu -fgpu-rdc -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,RDC %s
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
// With debugging enabled, ptxas should be run with with no ptxas optimizations.
// RUN: %clang -### -target x86_64-linux-gnu --cuda-noopt-device-debug -O2 -g -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,DBG %s
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,DBG %s
// --no-cuda-noopt-device-debug overrides --cuda-noopt-device-debug.
// RUN: %clang -### -target x86_64-linux-gnu --cuda-noopt-device-debug \
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: --no-cuda-noopt-device-debug -O2 -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT2 %s
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT2 %s
// Regular compile without -O. This should result in us passing -O0 to ptxas.
// RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT0 %s
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s
// Regular compiles with -Os and -Oz. For lack of a better option, we map
// these to ptxas -O3.
// RUN: %clang -### -target x86_64-linux-gnu -Os -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT2 %s
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT2 %s
// RUN: %clang -### -target x86_64-linux-gnu -Oz -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT2 %s
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT2 %s
// Regular compile targeting sm_35.
// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -c %s 2>&1 \
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35 %s
// Separate compilation targeting sm_35.
// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -fgpu-rdc -c %s 2>&1 \
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
// 32-bit compile.
// RUN: %clang -### -target i386-linux-gnu -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM20 %s
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35 %s
// 32-bit compile when generating relocatable device code.
// RUN: %clang -### -target i386-linux-gnu -fgpu-rdc -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM20,RDC %s
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35,RDC %s
// Compile with -fintegrated-as. This should still cause us to invoke ptxas.
// RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT0 %s
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s
// Check that we still pass -c when generating relocatable device code.
// RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -fgpu-rdc -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,RDC %s
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
// Check -Xcuda-ptxas and -Xcuda-fatbinary
// RUN: %clang -### -target x86_64-linux-gnu -c -Xcuda-ptxas -foo1 \
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: -Xcuda-fatbinary -bar1 -Xcuda-ptxas -foo2 -Xcuda-fatbinary -bar2 %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=CHECK,SM20,PTXAS-EXTRA,FATBINARY-EXTRA %s
+// RUN: | FileCheck -check-prefixes=CHECK,SM35,PTXAS-EXTRA,FATBINARY-EXTRA %s
// MacOS spot-checks
// RUN: %clang -### -target x86_64-apple-macosx -O0 -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT0 %s
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s
// RUN: %clang -### -target x86_64-apple-macosx --cuda-gpu-arch=sm_35 -c %s 2>&1 \
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35 %s
// RUN: %clang -### -target i386-apple-macosx -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM20 %s
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35 %s
// Check relocatable device code generation on MacOS.
// RUN: %clang -### -target x86_64-apple-macosx -O0 -fgpu-rdc -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,RDC %s
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
// RUN: %clang -### -target x86_64-apple-macosx --cuda-gpu-arch=sm_35 -fgpu-rdc -c %s 2>&1 \
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
// RUN: %clang -### -target i386-apple-macosx -fgpu-rdc -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM20,RDC %s
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35,RDC %s
// Check that CLANG forwards the -v flag to PTXAS.
-// RUN: %clang -### -save-temps -no-canonical-prefixes -v %s 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-PTXAS-VERBOSE %s
+// RUN: %clang -### -save-temps -no-canonical-prefixes -v %s 2>&1 \
+// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: | FileCheck -check-prefix=CHK-PTXAS-VERBOSE %s
// Match clang job that produces PTX assembly.
// CHECK: "-cc1"
// ARCH64-SAME: "-triple" "nvptx64-nvidia-cuda"
// ARCH32-SAME: "-triple" "nvptx-nvidia-cuda"
-// SM20-SAME: "-target-cpu" "sm_20"
// SM35-SAME: "-target-cpu" "sm_35"
// RDC-SAME: "-fgpu-rdc"
// CHECK-NOT: "-fgpu-rdc"
-// SM20-SAME: "-o" "[[PTXFILE:[^"]*]]"
// SM35-SAME: "-o" "[[PTXFILE:[^"]*]]"
// Match the call to ptxas (which assembles PTX to SASS).
@@ -112,9 +136,7 @@
// OPT3-SAME: "-O3"
// OPT3-NOT: "-g"
// DBG-SAME: "-g" "--dont-merge-basicblocks" "--return-at-end"
-// SM20-SAME: "--gpu-name" "sm_20"
// SM35-SAME: "--gpu-name" "sm_35"
-// SM20-SAME: "--output-file" "[[CUBINFILE:[^"]*]]"
// SM35-SAME: "--output-file" "[[CUBINFILE:[^"]*]]"
// CHECK-SAME: "[[PTXFILE]]"
// PTXAS-EXTRA-SAME: "-foo1"
@@ -129,9 +151,7 @@
// ARCH64-SAME-DAG: "-64"
// ARCH32-SAME-DAG: "-32"
// CHECK-DAG: "--create" "[[FATBINARY:[^"]*]]"
-// SM20-SAME-DAG: "--image=profile=compute_20,file=[[PTXFILE]]"
// SM35-SAME-DAG: "--image=profile=compute_35,file=[[PTXFILE]]"
-// SM20-SAME-DAG: "--image=profile=sm_20,file=[[CUBINFILE]]"
// SM35-SAME-DAG: "--image=profile=sm_35,file=[[CUBINFILE]]"
// FATBINARY-EXTRA-SAME: "-bar1"
// FATBINARY-EXTRA-SAME: "-bar2"
diff --git a/clang/test/Driver/cuda-march.cu b/clang/test/Driver/cuda-march.cu
index 123b6617e0a5..a5ae7869743e 100644
--- a/clang/test/Driver/cuda-march.cu
+++ b/clang/test/Driver/cuda-march.cu
@@ -9,14 +9,14 @@
// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c \
// RUN: -march=haswell %s 2>&1 | FileCheck %s
// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c \
-// RUN: -march=haswell --cuda-gpu-arch=sm_20 %s 2>&1 | FileCheck %s
+// RUN: -march=haswell --cuda-gpu-arch=sm_35 %s 2>&1 | FileCheck %s
// CHECK: {{.*}}clang{{.*}}" "-cc1"
// CHECK-SAME: "-triple" "nvptx
-// CHECK-SAME: "-target-cpu" "sm_20"
+// CHECK-SAME: "-target-cpu" "sm_35"
// CHECK: ptxas
-// CHECK-SAME: "--gpu-name" "sm_20"
+// CHECK-SAME: "--gpu-name" "sm_35"
// CHECK: {{.*}}clang{{.*}}" "-cc1"
// CHECK-SAME: "-target-cpu" "haswell"
diff --git a/clang/test/Driver/cuda-options-freebsd.cu b/clang/test/Driver/cuda-options-freebsd.cu
deleted file mode 100644
index 18053304687a..000000000000
--- a/clang/test/Driver/cuda-options-freebsd.cu
+++ /dev/null
@@ -1,289 +0,0 @@
-// Tests CUDA compilation pipeline construction in Driver.
-// REQUIRES: clang-driver
-// REQUIRES: x86-registered-target
-// REQUIRES: nvptx-registered-target
-
-// Simple compilation case. Compile device-side to PTX assembly and make sure
-// we use it on the host side.
-// RUN: %clang -### -target x86_64-unknown-freebsd -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
-// RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \
-// RUN: -check-prefix NOLINK %s
-
-// Typical compilation + link case.
-// RUN: %clang -### -target x86_64-unknown-freebsd %s 2>&1 \
-// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
-// RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \
-// RUN: -check-prefix LINK %s
-
-// Verify that --cuda-host-only disables device-side compilation, but doesn't
-// disable host-side compilation/linking.
-// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-host-only %s 2>&1 \
-// RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
-// RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
-
-// Verify that --cuda-device-only disables host-side compilation and linking.
-// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-device-only %s 2>&1 \
-// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
-// RUN: -check-prefix NOHOST -check-prefix NOLINK %s
-
-// Check that the last of --cuda-compile-host-device, --cuda-host-only, and
-// --cuda-device-only wins.
-
-// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-device-only \
-// RUN: --cuda-host-only %s 2>&1 \
-// RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
-// RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
-
-// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-compile-host-device \
-// RUN: --cuda-host-only %s 2>&1 \
-// RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
-// RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
-
-// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-host-only \
-// RUN: --cuda-device-only %s 2>&1 \
-// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
-// RUN: -check-prefix NOHOST -check-prefix NOLINK %s
-
-// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-compile-host-device \
-// RUN: --cuda-device-only %s 2>&1 \
-// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
-// RUN: -check-prefix NOHOST -check-prefix NOLINK %s
-
-// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-host-only \
-// RUN: --cuda-compile-host-device %s 2>&1 \
-// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
-// RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \
-// RUN: -check-prefix LINK %s
-
-// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-device-only \
-// RUN: --cuda-compile-host-device %s 2>&1 \
-// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
-// RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \
-// RUN: -check-prefix LINK %s
-
-// Verify that --cuda-gpu-arch option passes the correct GPU architecture to
-// device compilation.
-// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-gpu-arch=sm_30 -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
-// RUN: -check-prefix DEVICE-SM30 -check-prefix HOST \
-// RUN: -check-prefix INCLUDES-DEVICE -check-prefix NOLINK %s
-
-// Verify that there is one device-side compilation per --cuda-gpu-arch args
-// and that all results are included on the host side.
-// RUN: %clang -### -target x86_64-unknown-freebsd \
-// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,DEVICE2 \
-// RUN: -check-prefixes DEVICE-SM30,DEVICE2-SM35 \
-// RUN: -check-prefixes INCLUDES-DEVICE,INCLUDES-DEVICE2 \
-// RUN: -check-prefixes HOST,HOST-NOSAVE,NOLINK %s
-
-// Verify that device-side results are passed to the correct tool when
-// -save-temps is used.
-// RUN: %clang -### -target x86_64-unknown-freebsd -save-temps -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-SAVE \
-// RUN: -check-prefix HOST -check-prefix HOST-SAVE -check-prefix NOLINK %s
-
-// Verify that device-side results are passed to the correct tool when
-// -fno-integrated-as is used.
-// RUN: %clang -### -target x86_64-unknown-freebsd -fno-integrated-as -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
-// RUN: -check-prefix HOST -check-prefix HOST-NOSAVE \
-// RUN: -check-prefix HOST-AS -check-prefix NOLINK %s
-
-// Verify that --[no-]cuda-gpu-arch arguments are handled correctly.
-// a) --no-cuda-gpu-arch=X negates preceding --cuda-gpu-arch=X
-// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-device-only \
-// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
-// RUN: --no-cuda-gpu-arch=sm_35 \
-// RUN: -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,NOARCH-SM35 %s
-
-// b) --no-cuda-gpu-arch=X negates more than one preceding --cuda-gpu-arch=X
-// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-device-only \
-// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
-// RUN: --no-cuda-gpu-arch=sm_35 \
-// RUN: -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,NOARCH-SM35 %s
-
-// c) if --no-cuda-gpu-arch=X negates all preceding --cuda-gpu-arch=X
-// we default to sm_20 -- same as if no --cuda-gpu-arch were passed.
-// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-device-only \
-// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
-// RUN: --no-cuda-gpu-arch=sm_35 --no-cuda-gpu-arch=sm_30 \
-// RUN: -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes ARCH-SM20,NOARCH-SM30,NOARCH-SM35 %s
-
-// d) --no-cuda-gpu-arch=X is a no-op if there's no preceding --cuda-gpu-arch=X
-// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-device-only \
-// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30\
-// RUN: --no-cuda-gpu-arch=sm_50 \
-// RUN: -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,ARCH-SM35 %s
-
-// e) --no-cuda-gpu-arch=X does not affect following --cuda-gpu-arch=X
-// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-device-only \
-// RUN: --no-cuda-gpu-arch=sm_35 --no-cuda-gpu-arch=sm_30 \
-// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
-// RUN: -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,ARCH-SM35 %s
-
-// f) --no-cuda-gpu-arch=all negates all preceding --cuda-gpu-arch=X
-// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-device-only \
-// RUN: --cuda-gpu-arch=sm_20 --cuda-gpu-arch=sm_30 \
-// RUN: --no-cuda-gpu-arch=all \
-// RUN: --cuda-gpu-arch=sm_35 \
-// RUN: -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes NOARCH-SM20,NOARCH-SM30,ARCH-SM35 %s
-
-// g) There's no --cuda-gpu-arch=all
-// RUN: %clang -### -target x86_64-unknown-freebsd --cuda-device-only \
-// RUN: --cuda-gpu-arch=all \
-// RUN: -c %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCHALLERROR %s
-
-
-// Verify that --[no-]cuda-include-ptx arguments are handled correctly.
-// a) by default we're including PTX for all GPUs.
-// RUN: %clang -### -target x86_64-unknown-freebsd \
-// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
-// RUN: -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s
-
-// b) --no-cuda-include-ptx=all disables PTX inclusion for all GPUs
-// RUN: %clang -### -target x86_64-unknown-freebsd \
-// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
-// RUN: --no-cuda-include-ptx=all \
-// RUN: -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM35,NOPTX-SM30 %s
-
-// c) --no-cuda-include-ptx=sm_XX disables PTX inclusion for that GPU only.
-// RUN: %clang -### -target x86_64-unknown-freebsd \
-// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
-// RUN: --no-cuda-include-ptx=sm_35 \
-// RUN: -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM35,PTX-SM30 %s
-// RUN: %clang -### -target x86_64-unknown-freebsd \
-// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
-// RUN: --no-cuda-include-ptx=sm_30 \
-// RUN: -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,NOPTX-SM30 %s
-
-// d) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=all
-// RUN: %clang -### -target x86_64-unknown-freebsd \
-// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
-// RUN: --no-cuda-include-ptx=all --cuda-include-ptx=all \
-// RUN: -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s
-
-// e) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=sm_XX
-// RUN: %clang -### -target x86_64-unknown-freebsd \
-// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
-// RUN: --no-cuda-include-ptx=sm_30 --cuda-include-ptx=all \
-// RUN: -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s
-
-
-// ARCH-SM20: "-cc1"{{.*}}"-target-cpu" "sm_20"
-// NOARCH-SM20-NOT: "-cc1"{{.*}}"-target-cpu" "sm_20"
-// ARCH-SM30: "-cc1"{{.*}}"-target-cpu" "sm_30"
-// NOARCH-SM30-NOT: "-cc1"{{.*}}"-target-cpu" "sm_30"
-// ARCH-SM35: "-cc1"{{.*}}"-target-cpu" "sm_35"
-// NOARCH-SM35-NOT: "-cc1"{{.*}}"-target-cpu" "sm_35"
-// ARCHALLERROR: error: unsupported CUDA gpu architecture: all
-
-// Match device-side preprocessor and compiler phases with -save-temps.
-// DEVICE-SAVE: "-cc1" "-triple" "nvptx64-nvidia-cuda"
-// DEVICE-SAVE-SAME: "-aux-triple" "x86_64-unknown-freebsd"
-// DEVICE-SAVE-SAME: "-fcuda-is-device"
-// DEVICE-SAVE-SAME: "-x" "cuda"
-
-// DEVICE-SAVE: "-cc1" "-triple" "nvptx64-nvidia-cuda"
-// DEVICE-SAVE-SAME: "-aux-triple" "x86_64-unknown-freebsd"
-// DEVICE-SAVE-SAME: "-fcuda-is-device"
-// DEVICE-SAVE-SAME: "-x" "cuda-cpp-output"
-
-// Match the job that produces PTX assembly.
-// DEVICE: "-cc1" "-triple" "nvptx64-nvidia-cuda"
-// DEVICE-NOSAVE-SAME: "-aux-triple" "x86_64-unknown-freebsd"
-// DEVICE-SAME: "-fcuda-is-device"
-// DEVICE-SM30-SAME: "-target-cpu" "sm_30"
-// DEVICE-SAME: "-o" "[[PTXFILE:[^"]*]]"
-// DEVICE-NOSAVE-SAME: "-x" "cuda"
-// DEVICE-SAVE-SAME: "-x" "ir"
-
-// Match the call to ptxas (which assembles PTX to SASS).
-// DEVICE:ptxas
-// DEVICE-SM30-DAG: "--gpu-name" "sm_30"
-// DEVICE-DAG: "--output-file" "[[CUBINFILE:[^"]*]]"
-// DEVICE-DAG: "[[PTXFILE]]"
-
-// Match another device-side compilation.
-// DEVICE2: "-cc1" "-triple" "nvptx64-nvidia-cuda"
-// DEVICE2-SAME: "-aux-triple" "x86_64-unknown-freebsd"
-// DEVICE2-SAME: "-fcuda-is-device"
-// DEVICE2-SM35-SAME: "-target-cpu" "sm_35"
-// DEVICE2-SAME: "-o" "[[PTXFILE2:[^"]*]]"
-// DEVICE2-SAME: "-x" "cuda"
-
-// Match another call to ptxas.
-// DEVICE2: ptxas
-// DEVICE2-SM35-DAG: "--gpu-name" "sm_35"
-// DEVICE2-DAG: "--output-file" "[[CUBINFILE2:[^"]*]]"
-// DEVICE2-DAG: "[[PTXFILE2]]"
-
-// Match no device-side compilation.
-// NODEVICE-NOT: "-cc1" "-triple" "nvptx64-nvidia-cuda"
-// NODEVICE-NOT: "-fcuda-is-device"
-
-// INCLUDES-DEVICE:fatbinary
-// INCLUDES-DEVICE-DAG: "--create" "[[FATBINARY:[^"]*]]"
-// INCLUDES-DEVICE-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE]]"
-// INCLUDES-DEVICE-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE]]"
-// INCLUDES-DEVICE2-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE2]]"
-// INCLUDES-DEVICE2-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE2]]"
-
-// Match host-side preprocessor job with -save-temps.
-// HOST-SAVE: "-cc1" "-triple" "x86_64-unknown-freebsd"
-// HOST-SAVE-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
-// HOST-SAVE-NOT: "-fcuda-is-device"
-// HOST-SAVE-SAME: "-x" "cuda"
-
-// Match host-side compilation.
-// HOST: "-cc1" "-triple" "x86_64-unknown-freebsd"
-// HOST-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
-// HOST-NOT: "-fcuda-is-device"
-// There is only one GPU binary after combining it with fatbinary!
-// INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary"
-// INCLUDES-DEVICE-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]"
-// There is only one GPU binary after combining it with fatbinary.
-// INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary"
-// HOST-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]"
-// HOST-NOSAVE-SAME: "-x" "cuda"
-// HOST-SAVE-SAME: "-x" "cuda-cpp-output"
-
-// Match external assembler that uses compilation output.
-// HOST-AS: "-o" "{{.*}}.o" "[[HOSTOUTPUT]]"
-
-// Match no GPU code inclusion.
-// NOINCLUDES-DEVICE-NOT: "-fcuda-include-gpubinary"
-
-// Match no host compilation.
-// NOHOST-NOT: "-cc1" "-triple"
-// NOHOST-NOT: "-x" "cuda"
-
-// Match linker.
-// LINK: "{{.*}}{{ld|link}}{{(.exe)?}}"
-// LINK-SAME: "[[HOSTOUTPUT]]"
-
-// Match no linker.
-// NOLINK-NOT: "{{.*}}{{ld|link}}{{(.exe)?}}"
-
-// FATBIN-COMMON:fatbinary
-// FATBIN-COMMON: "--create" "[[FATBINARY:[^"]*]]"
-// FATBIN-COMMON: "--image=profile=sm_30,file=
-// PTX-SM30: "--image=profile=compute_30,file=
-// NOPTX-SM30-NOT: "--image=profile=compute_30,file=
-// FATBIN-COMMON: "--image=profile=sm_35,file=
-// PTX-SM35: "--image=profile=compute_35,file=
-// NOPTX-SM35-NOT: "--image=profile=compute_35,file=
diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu
index a1b66672572d..ad1a0bc8cbb0 100644
--- a/clang/test/Driver/cuda-options.cu
+++ b/clang/test/Driver/cuda-options.cu
@@ -94,47 +94,47 @@
// Verify that --[no-]cuda-gpu-arch arguments are handled correctly.
// a) --no-cuda-gpu-arch=X negates preceding --cuda-gpu-arch=X
// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
-// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
-// RUN: --no-cuda-gpu-arch=sm_35 \
+// RUN: --cuda-gpu-arch=sm_50 --cuda-gpu-arch=sm_30 \
+// RUN: --no-cuda-gpu-arch=sm_50 \
// RUN: -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,NOARCH-SM35 %s
+// RUN: | FileCheck -check-prefixes ARCH-SM30,NOARCH-SM35,NOARCH-SM50 %s
// b) --no-cuda-gpu-arch=X negates more than one preceding --cuda-gpu-arch=X
// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
-// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
-// RUN: --no-cuda-gpu-arch=sm_35 \
+// RUN: --cuda-gpu-arch=sm_50 --cuda-gpu-arch=sm_50 --cuda-gpu-arch=sm_30 \
+// RUN: --no-cuda-gpu-arch=sm_50 \
// RUN: -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,NOARCH-SM35 %s
+// RUN: | FileCheck -check-prefixes ARCH-SM30,NOARCH-SM35,NOARCH-SM50 %s
// c) if --no-cuda-gpu-arch=X negates all preceding --cuda-gpu-arch=X
-// we default to sm_20 -- same as if no --cuda-gpu-arch were passed.
+// we default to sm_35 -- same as if no --cuda-gpu-arch were passed.
// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
-// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
-// RUN: --no-cuda-gpu-arch=sm_35 --no-cuda-gpu-arch=sm_30 \
+// RUN: --cuda-gpu-arch=sm_50 --cuda-gpu-arch=sm_30 \
+// RUN: --no-cuda-gpu-arch=sm_50 --no-cuda-gpu-arch=sm_30 \
// RUN: -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes ARCH-SM20,NOARCH-SM30,NOARCH-SM35 %s
+// RUN: | FileCheck -check-prefixes NOARCH-SM30,ARCH-SM35,NOARCH-SM50 %s
// d) --no-cuda-gpu-arch=X is a no-op if there's no preceding --cuda-gpu-arch=X
// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30\
// RUN: --no-cuda-gpu-arch=sm_50 \
// RUN: -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,ARCH-SM35 %s
+// RUN: | FileCheck -check-prefixes ARCH-SM30,ARCH-SM35,NOARCH-SM50 %s
// e) --no-cuda-gpu-arch=X does not affect following --cuda-gpu-arch=X
// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
-// RUN: --no-cuda-gpu-arch=sm_35 --no-cuda-gpu-arch=sm_30 \
-// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
+// RUN: --no-cuda-gpu-arch=sm_50 --no-cuda-gpu-arch=sm_30 \
+// RUN: --cuda-gpu-arch=sm_50 --cuda-gpu-arch=sm_30 \
// RUN: -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,ARCH-SM35 %s
+// RUN: | FileCheck -check-prefixes ARCH-SM30,NOARCH-SM35,ARCH-SM50 %s
// f) --no-cuda-gpu-arch=all negates all preceding --cuda-gpu-arch=X
// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
-// RUN: --cuda-gpu-arch=sm_20 --cuda-gpu-arch=sm_30 \
+// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
// RUN: --no-cuda-gpu-arch=all \
-// RUN: --cuda-gpu-arch=sm_35 \
+// RUN: --cuda-gpu-arch=sm_50 \
// RUN: -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes NOARCH-SM20,NOARCH-SM30,ARCH-SM35 %s
+// RUN: | FileCheck -check-prefixes NOARCH-SM30,NOARCH-SM35,ARCH-SM50 %s
// g) There's no --cuda-gpu-arch=all
// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
@@ -190,12 +190,12 @@
// RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,HOST,INCLUDES-DEVICE,NOLINK,THINLTOWPD %s
// THINLTOWPD-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
-// ARCH-SM20: "-cc1"{{.*}}"-target-cpu" "sm_20"
-// NOARCH-SM20-NOT: "-cc1"{{.*}}"-target-cpu" "sm_20"
// ARCH-SM30: "-cc1"{{.*}}"-target-cpu" "sm_30"
// NOARCH-SM30-NOT: "-cc1"{{.*}}"-target-cpu" "sm_30"
// ARCH-SM35: "-cc1"{{.*}}"-target-cpu" "sm_35"
// NOARCH-SM35-NOT: "-cc1"{{.*}}"-target-cpu" "sm_35"
+// ARCH-SM50: "-cc1"{{.*}}"-target-cpu" "sm_50"
+// NOARCH-SM50-NOT: "-cc1"{{.*}}"-target-cpu" "sm_50"
// ARCHALLERROR: error: unsupported CUDA gpu architecture: all
// Match device-side preprocessor and compiler phases with -save-temps.
diff --git a/clang/test/Driver/cuda-ptxas-path.cu b/clang/test/Driver/cuda-ptxas-path.cu
index 3ac5932b067a..2836babcb980 100644
--- a/clang/test/Driver/cuda-ptxas-path.cu
+++ b/clang/test/Driver/cuda-ptxas-path.cu
@@ -9,4 +9,4 @@
// CHECK-NOT: "ptxas"
// CHECK: "/some/path/to/ptxas"
-// CHECK-SAME: "--gpu-name" "sm_20"
+// CHECK-SAME: "--gpu-name" "sm_35"
diff --git a/clang/test/Driver/lto.cu b/clang/test/Driver/lto.cu
index 131ed83c1e32..1fd48fd4405c 100644
--- a/clang/test/Driver/lto.cu
+++ b/clang/test/Driver/lto.cu
@@ -16,13 +16,13 @@
// CHECK-COMPILELINK-ACTIONS: 0: input, "{{.*}}lto.cu", cuda, (host-cuda)
// CHECK-COMPILELINK-ACTIONS: 1: preprocessor, {0}, cuda-cpp-output
// CHECK-COMPILELINK-ACTIONS: 2: compiler, {1}, ir, (host-cuda)
-// CHECK-COMPILELINK-ACTIONS: 3: input, "{{.*}}lto.cu", cuda, (device-cuda, sm_20)
-// CHECK-COMPILELINK-ACTIONS: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_20)
-// CHECK-COMPILELINK-ACTIONS: 5: compiler, {4}, ir, (device-cuda, sm_20)
-// CHECK-COMPILELINK-ACTIONS: 6: backend, {5}, assembler, (device-cuda, sm_20)
-// CHECK-COMPILELINK-ACTIONS: 7: assembler, {6}, object, (device-cuda, sm_20)
-// CHECK-COMPILELINK-ACTIONS: 8: offload, "device-cuda (nvptx{{.*}}-nvidia-cuda:sm_20)" {7}, object
-// CHECK-COMPILELINK-ACTIONS: 9: offload, "device-cuda (nvptx{{.*}}-nvidia-cuda:sm_20)" {6}, assembler
+// CHECK-COMPILELINK-ACTIONS: 3: input, "{{.*}}lto.cu", cuda, (device-cuda, sm_{{.*}})
+// CHECK-COMPILELINK-ACTIONS: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_{{.*}})
+// CHECK-COMPILELINK-ACTIONS: 5: compiler, {4}, ir, (device-cuda, sm_{{.*}})
+// CHECK-COMPILELINK-ACTIONS: 6: backend, {5}, assembler, (device-cuda, sm_{{.*}})
+// CHECK-COMPILELINK-ACTIONS: 7: assembler, {6}, object, (device-cuda, sm_{{.*}})
+// CHECK-COMPILELINK-ACTIONS: 8: offload, "device-cuda (nvptx{{.*}}-nvidia-cuda:sm_{{.*}})" {7}, object
+// CHECK-COMPILELINK-ACTIONS: 9: offload, "device-cuda (nvptx{{.*}}-nvidia-cuda:sm_{{.*}})" {6}, assembler
// CHECK-COMPILELINK-ACTIONS: 10: linker, {8, 9}, cuda-fatbin, (device-cuda)
// CHECK-COMPILELINK-ACTIONS: 11: offload, "host-cuda {{.*}}" {2}, "device-cuda{{.*}}" {10}, ir
// CHECK-COMPILELINK-ACTIONS: 12: backend, {11}, lto-bc, (host-cuda)
diff --git a/clang/test/Driver/openmp-offload.c b/clang/test/Driver/openmp-offload.c
index 6d4c039fdff6..699a31276d6f 100644
--- a/clang/test/Driver/openmp-offload.c
+++ b/clang/test/Driver/openmp-offload.c
@@ -215,13 +215,13 @@
// CHK-PHASES-WITH-CUDA: 0: input, "[[INPUT:.+\.c]]", cuda, (host-cuda-openmp)
// CHK-PHASES-WITH-CUDA: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda-openmp)
// CHK-PHASES-WITH-CUDA: 2: compiler, {1}, ir, (host-cuda-openmp)
-// CHK-PHASES-WITH-CUDA: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_20)
-// CHK-PHASES-WITH-CUDA: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_20)
-// CHK-PHASES-WITH-CUDA: 5: compiler, {4}, ir, (device-cuda, sm_20)
-// CHK-PHASES-WITH-CUDA: 6: backend, {5}, assembler, (device-cuda, sm_20)
-// CHK-PHASES-WITH-CUDA: 7: assembler, {6}, object, (device-cuda, sm_20)
-// CHK-PHASES-WITH-CUDA: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_20)" {7}, object
-// CHK-PHASES-WITH-CUDA: 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_20)" {6}, assembler
+// CHK-PHASES-WITH-CUDA: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_{{.*}})
+// CHK-PHASES-WITH-CUDA: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_{{.*}})
+// CHK-PHASES-WITH-CUDA: 5: compiler, {4}, ir, (device-cuda, sm_{{.*}})
+// CHK-PHASES-WITH-CUDA: 6: backend, {5}, assembler, (device-cuda, sm_{{.*}})
+// CHK-PHASES-WITH-CUDA: 7: assembler, {6}, object, (device-cuda, sm_{{.*}})
+// CHK-PHASES-WITH-CUDA: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_{{.*}})" {7}, object
+// CHK-PHASES-WITH-CUDA: 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_{{.*}})" {6}, assembler
// CHK-PHASES-WITH-CUDA: 10: linker, {8, 9}, cuda-fatbin, (device-cuda)
// CHK-PHASES-WITH-CUDA: 11: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {10}, ir
// CHK-PHASES-WITH-CUDA: 12: backend, {11}, assembler, (host-cuda-openmp)
diff --git a/clang/test/Driver/opt-record.c b/clang/test/Driver/opt-record.c
index 32aa660ef231..02840f49a86f 100644
--- a/clang/test/Driver/opt-record.c
+++ b/clang/test/Driver/opt-record.c
@@ -24,7 +24,7 @@
// CHECK-NO-O: "-cc1"
// CHECK-NO-O-DAG: "-opt-record-file" "opt-record.opt.yaml"
-// CHECK-CUDA-DEV-DAG: "-opt-record-file" "opt-record-cuda-{{nvptx64|nvptx}}-nvidia-cuda-sm_20.opt.yaml"
+// CHECK-CUDA-DEV-DAG: "-opt-record-file" "opt-record-cuda-{{nvptx64|nvptx}}-nvidia-cuda-sm_{{.*}}.opt.yaml"
// CHECK-EQ: "-cc1"
// CHECK-EQ: "-opt-record-file" "BAR.txt"
diff --git a/clang/test/Driver/thinlto.cu b/clang/test/Driver/thinlto.cu
index bd13e2e3b731..1f82115d76b4 100644
--- a/clang/test/Driver/thinlto.cu
+++ b/clang/test/Driver/thinlto.cu
@@ -16,13 +16,13 @@
// CHECK-COMPILELINK-ACTIONS: 0: input, "{{.*}}thinlto.cu", cuda, (host-cuda)
// CHECK-COMPILELINK-ACTIONS: 1: preprocessor, {0}, cuda-cpp-output
// CHECK-COMPILELINK-ACTIONS: 2: compiler, {1}, ir, (host-cuda)
-// CHECK-COMPILELINK-ACTIONS: 3: input, "{{.*}}thinlto.cu", cuda, (device-cuda, sm_20)
-// CHECK-COMPILELINK-ACTIONS: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_20)
-// CHECK-COMPILELINK-ACTIONS: 5: compiler, {4}, ir, (device-cuda, sm_20)
-// CHECK-COMPILELINK-ACTIONS: 6: backend, {5}, assembler, (device-cuda, sm_20)
-// CHECK-COMPILELINK-ACTIONS: 7: assembler, {6}, object, (device-cuda, sm_20)
-// CHECK-COMPILELINK-ACTIONS: 8: offload, "device-cuda (nvptx{{.*}}-nvidia-cuda:sm_20)" {7}, object
-// CHECK-COMPILELINK-ACTIONS: 9: offload, "device-cuda (nvptx{{.*}}-nvidia-cuda:sm_20)" {6}, assembler
+// CHECK-COMPILELINK-ACTIONS: 3: input, "{{.*}}thinlto.cu", cuda, (device-cuda, sm_{{.*}})
+// CHECK-COMPILELINK-ACTIONS: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_{{.*}})
+// CHECK-COMPILELINK-ACTIONS: 5: compiler, {4}, ir, (device-cuda, sm_{{.*}})
+// CHECK-COMPILELINK-ACTIONS: 6: backend, {5}, assembler, (device-cuda, sm_{{.*}})
+// CHECK-COMPILELINK-ACTIONS: 7: assembler, {6}, object, (device-cuda, sm_{{.*}})
+// CHECK-COMPILELINK-ACTIONS: 8: offload, "device-cuda (nvptx{{.*}}-nvidia-cuda:sm_{{.*}})" {7}, object
+// CHECK-COMPILELINK-ACTIONS: 9: offload, "device-cuda (nvptx{{.*}}-nvidia-cuda:sm_{{.*}})" {6}, assembler
// CHECK-COMPILELINK-ACTIONS: 10: linker, {8, 9}, cuda-fatbin, (device-cuda)
// CHECK-COMPILELINK-ACTIONS: 11: offload, "host-cuda {{.*}}" {2}, "device-cuda{{.*}}" {10}, ir
// CHECK-COMPILELINK-ACTIONS: 12: backend, {11}, lto-bc, (host-cuda)
More information about the cfe-commits
mailing list