[clang] [CUDA] Move CUDA to new driver by default (PR #122312)
Joseph Huber via cfe-commits
cfe-commits at lists.llvm.org
Thu Jan 9 09:09:11 PST 2025
https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/122312
>From 533572c8e73c7330fe91d95428fd0189471073d8 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Thu, 9 Jan 2025 10:40:43 -0600
Subject: [PATCH] [CUDA] Move CUDA to new driver by default
Summary:
This patch updates the --offload-new-driver flag to be default for CUDA.
This mostly just required updating a lot of tests to use the old format.
I tried to update them where possible, but some were directly checking
the old format.
https://discourse.llvm.org/t/rfc-use-the-new-offloding-driver-for-cuda-and-hip-compilation-by-default/77468/18
---
clang/lib/Driver/Driver.cpp | 6 ++-
clang/lib/Driver/ToolChains/Clang.cpp | 9 +++--
clang/lib/Driver/ToolChains/Cuda.cpp | 2 +-
clang/test/Driver/cuda-arch-translation.cu | 26 ++++++-------
clang/test/Driver/cuda-bindings.cu | 43 ++++++----------------
clang/test/Driver/cuda-options.cu | 30 +++++++--------
clang/test/Driver/cuda-output-asm.cu | 4 --
7 files changed, 51 insertions(+), 69 deletions(-)
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 57fa7c1110a68e..64ea3c4e6e08d3 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -4334,7 +4334,8 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
Args.hasFlag(options::OPT_foffload_via_llvm,
options::OPT_fno_offload_via_llvm, false) ||
Args.hasFlag(options::OPT_offload_new_driver,
- options::OPT_no_offload_new_driver, false);
+ options::OPT_no_offload_new_driver,
+ C.isOffloadingHostKind(Action::OFK_Cuda));
// Builder to be used to build offloading actions.
std::unique_ptr<OffloadingActionBuilder> OffloadBuilder =
@@ -5084,7 +5085,8 @@ Action *Driver::ConstructPhaseAction(
offloadDeviceOnly() ||
(TargetDeviceOffloadKind == Action::OFK_HIP &&
!Args.hasFlag(options::OPT_offload_new_driver,
- options::OPT_no_offload_new_driver, false)))
+ options::OPT_no_offload_new_driver,
+ C.isOffloadingHostKind(Action::OFK_Cuda))))
? types::TY_LLVM_IR
: types::TY_LLVM_BC;
return C.MakeAction<BackendJobAction>(Input, Output);
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index a0002371da2f1b..b7d40444a7ee6b 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5064,7 +5064,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
JA.isHostOffloading(Action::OFK_SYCL) ||
(JA.isHostOffloading(C.getActiveOffloadKinds()) &&
Args.hasFlag(options::OPT_offload_new_driver,
- options::OPT_no_offload_new_driver, false));
+ options::OPT_no_offload_new_driver,
+ C.isOffloadingHostKind(Action::OFK_Cuda)));
bool IsRDCMode =
Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false);
@@ -5418,7 +5419,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
if (IsUsingLTO) {
if (IsDeviceOffloadAction && !JA.isDeviceOffloading(Action::OFK_OpenMP) &&
!Args.hasFlag(options::OPT_offload_new_driver,
- options::OPT_no_offload_new_driver, false) &&
+ options::OPT_no_offload_new_driver,
+ C.isOffloadingHostKind(Action::OFK_Cuda)) &&
!Triple.isAMDGPU()) {
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< Args.getLastArg(options::OPT_foffload_lto,
@@ -6895,7 +6897,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
options::OPT_fno_offload_via_llvm, false)) {
CmdArgs.append({"--offload-new-driver", "-foffload-via-llvm"});
} else if (Args.hasFlag(options::OPT_offload_new_driver,
- options::OPT_no_offload_new_driver, false)) {
+ options::OPT_no_offload_new_driver,
+ C.isOffloadingHostKind(Action::OFK_Cuda))) {
CmdArgs.push_back("--offload-new-driver");
}
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 214f1e5d83478f..8967115bcc73d9 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -506,7 +506,7 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
static bool shouldIncludePTX(const ArgList &Args, StringRef InputArch) {
// The new driver does not include PTX by default to avoid overhead.
bool includePTX = !Args.hasFlag(options::OPT_offload_new_driver,
- options::OPT_no_offload_new_driver, false);
+ options::OPT_no_offload_new_driver, true);
for (Arg *A : Args.filtered(options::OPT_cuda_include_ptx_EQ,
options::OPT_no_cuda_include_ptx_EQ)) {
A->claim();
diff --git a/clang/test/Driver/cuda-arch-translation.cu b/clang/test/Driver/cuda-arch-translation.cu
index e96191cc9d4183..a0ae16452692bf 100644
--- a/clang/test/Driver/cuda-arch-translation.cu
+++ b/clang/test/Driver/cuda-arch-translation.cu
@@ -68,19 +68,19 @@
// HIP: clang-offload-bundler
-// SM20:--image=profile=sm_20{{.*}}--image=profile=compute_20
-// SM21:--image=profile=sm_21{{.*}}--image=profile=compute_20
-// SM30:--image=profile=sm_30{{.*}}--image=profile=compute_30
-// SM32:--image=profile=sm_32{{.*}}--image=profile=compute_32
-// SM35:--image=profile=sm_35{{.*}}--image=profile=compute_35
-// SM37:--image=profile=sm_37{{.*}}--image=profile=compute_37
-// SM50:--image=profile=sm_50{{.*}}--image=profile=compute_50
-// SM52:--image=profile=sm_52{{.*}}--image=profile=compute_52
-// SM53:--image=profile=sm_53{{.*}}--image=profile=compute_53
-// SM60:--image=profile=sm_60{{.*}}--image=profile=compute_60
-// SM61:--image=profile=sm_61{{.*}}--image=profile=compute_61
-// SM62:--image=profile=sm_62{{.*}}--image=profile=compute_62
-// SM70:--image=profile=sm_70{{.*}}--image=profile=compute_70
+// SM20:--image=profile=sm_20{{.*}}
+// SM21:--image=profile=sm_21{{.*}}
+// SM30:--image=profile=sm_30{{.*}}
+// SM32:--image=profile=sm_32{{.*}}
+// SM35:--image=profile=sm_35{{.*}}
+// SM37:--image=profile=sm_37{{.*}}
+// SM50:--image=profile=sm_50{{.*}}
+// SM52:--image=profile=sm_52{{.*}}
+// SM53:--image=profile=sm_53{{.*}}
+// SM60:--image=profile=sm_60{{.*}}
+// SM61:--image=profile=sm_61{{.*}}
+// SM62:--image=profile=sm_62{{.*}}
+// SM70:--image=profile=sm_70{{.*}}
// GFX600:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx600
// GFX601:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx601
// GFX602:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx602
diff --git a/clang/test/Driver/cuda-bindings.cu b/clang/test/Driver/cuda-bindings.cu
index 8ee1884936c069..5b6f944621439e 100644
--- a/clang/test/Driver/cuda-bindings.cu
+++ b/clang/test/Driver/cuda-bindings.cu
@@ -23,14 +23,14 @@
// BIN-NOT: cuda-bindings-device-cuda-nvptx64
// BIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output:
// BIN-NOT: cuda-bindings-device-cuda-nvptx64
-// BIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
+// BIN: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out"
//
// Test single gpu architecture up to the assemble phase.
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --cuda-gpu-arch=sm_30 %s -S 2>&1 \
// RUN: | FileCheck -check-prefix=ASM %s
-// ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
+// ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[PTX:.+]].s"
// ASM-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
//
@@ -61,40 +61,21 @@
// BIN2-NOT: cuda-bindings-device-cuda-nvptx64
// BIN2: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output:
// BIN2-NOT: cuda-bindings-device-cuda-nvptx64
-// AOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
-// TOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "{{.*}}/out"
+// AOUT: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out"
+// TOUT: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "{{.*}}/out"
// .. same, but with -fsyntax-only
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
-// RUN: | FileCheck -check-prefix=SYN %s
-// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
-// RUN: --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
-// RUN: | FileCheck -check-prefix=SYN %s
-// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
-// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
-// RUN: | FileCheck -check-prefix=SYN %s
-// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
-// RUN: --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
-// RUN: | FileCheck -check-prefix=SYN %s
-// SYN-NOT: inputs:
-// SYN: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
-// SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
-// SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
-// SYN-NOT: inputs
-
-// .. and with --offload-new-driver
-// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
-// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 --offload-new-driver %s 2>&1 \
// RUN: | FileCheck -check-prefix=NDSYN %s
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
-// RUN: --offload-arch=sm_30,sm_35 %s --offload-new-driver -o %t/out 2>&1 \
+// RUN: --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
// RUN: | FileCheck -check-prefix=NDSYN %s
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
-// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --offload-new-driver 2>&1 \
+// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
// RUN: | FileCheck -check-prefix=NDSYN %s
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
-// RUN: --offload-arch=sm_30,sm_35 %s --offload-new-driver -o %t/out 2>&1 \
+// RUN: --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
// RUN: | FileCheck -check-prefix=NDSYN %s
// NDSYN-NOT: inputs:
// NDSYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
@@ -109,8 +90,8 @@
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \
// RUN: | FileCheck -check-prefix=ASM2 %s
-// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
-// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.s"
+// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[SM30:.+]].s"
+// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[SM35:.+]].s"
// ASM2-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
//
@@ -125,7 +106,7 @@
// RUN: | FileCheck -check-prefix=HBIN %s
// HBIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output:
// HBIN-NOT: cuda-bindings-device-cuda-nvptx64
-// HBIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
+// HBIN: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out"
//
// Test one or more gpu architecture up to the assemble phase in host-only
@@ -163,7 +144,7 @@
// Test two gpu architectures with complete compilation in device-only
// compilation mode.
//
-// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
+// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --no-offload-new-driver \
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only 2>&1 \
// RUN: | FileCheck -check-prefix=DBIN2 %s
// DBIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
@@ -177,7 +158,7 @@
// Test two gpu architectures up to the assemble phase in device-only
// compilation mode.
//
-// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
+// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --no-offload-new-driver \
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only -S 2>&1 \
// RUN: | FileCheck -check-prefix=DASM2 %s
// DASM2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu
index 67facf77f6c68a..db6536ca9e03b4 100644
--- a/clang/test/Driver/cuda-options.cu
+++ b/clang/test/Driver/cuda-options.cu
@@ -2,13 +2,13 @@
// Simple compilation case. Compile device-side to PTX assembly and make sure
// we use it on the host side.
-// RUN: %clang -### -target x86_64-linux-gnu -c -nogpulib -nogpuinc %s 2>&1 \
+// RUN: %clang -### --cuda-include-ptx=all -target x86_64-linux-gnu -c -nogpulib -nogpuinc %s 2>&1 \
// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
// RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \
// RUN: -check-prefix NOLINK %s
// Typical compilation + link case.
-// RUN: %clang -### -target x86_64-linux-gnu -nogpulib -nogpuinc %s 2>&1 \
+// RUN: %clang -### --cuda-include-ptx=all -target x86_64-linux-gnu -nogpulib -nogpuinc %s 2>&1 \
// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
// RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \
// RUN: -check-prefix LINK %s
@@ -33,7 +33,7 @@
// RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
// RUN: %clang -### --target=x86_64-linux-gnu --cuda-compile-host-device \
-// RUN: --cuda-host-only -nogpulib -nogpuinc %s 2>&1 \
+// RUN: --cuda-host-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
// RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
@@ -47,13 +47,13 @@
// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
// RUN: -check-prefix NOHOST -check-prefix NOLINK %s
-// RUN: %clang -### --target=x86_64-linux-gnu --cuda-host-only \
+// RUN: %clang -### --cuda-include-ptx=all --target=x86_64-linux-gnu --cuda-host-only \
// RUN: -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
// RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \
// RUN: -check-prefix LINK %s
-// RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
+// RUN: %clang -### --cuda-include-ptx=all --target=x86_64-linux-gnu --cuda-device-only \
// RUN: -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
// RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \
@@ -61,14 +61,14 @@
// Verify that --cuda-gpu-arch option passes the correct GPU architecture to
// device compilation.
-// RUN: %clang -### -nogpulib -nogpuinc --target=x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \
+// RUN: %clang -### -nogpulib -nogpuinc --cuda-include-ptx=all --target=x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \
// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
// RUN: -check-prefix DEVICE-SM52 -check-prefix HOST \
// RUN: -check-prefix INCLUDES-DEVICE -check-prefix NOLINK %s
// Verify that there is one device-side compilation per --cuda-gpu-arch args
// and that all results are included on the host side.
-// RUN: %clang -### --target=x86_64-linux-gnu \
+// RUN: %clang -### --cuda-include-ptx=all --target=x86_64-linux-gnu \
// RUN: -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 -c %s 2>&1 \
// RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,DEVICE2 \
// RUN: -check-prefixes DEVICE-SM52,DEVICE2-SM60 \
@@ -128,9 +128,9 @@
// f) --no-cuda-gpu-arch=all negates all preceding --cuda-gpu-arch=X
// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
// RUN: -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
-// RUN: --no-cuda-gpu-arch=all \
+// RUN: --no-cuda-version-check --no-cuda-gpu-arch=all \
// RUN: --cuda-gpu-arch=sm_70 \
-// RUN: -c -nogpulib -nogpuinc %s 2>&1 \
+// RUN: -c --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck -check-prefixes NOARCH-SM52,NOARCH-SM60,ARCH-SM70 %s
// g) There's no --cuda-gpu-arch=all
@@ -141,9 +141,9 @@
// Verify that --[no-]cuda-include-ptx arguments are handled correctly.
-// a) by default we're including PTX for all GPUs.
+// a) by default we're not including PTX for all GPUs.
// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \
-// RUN: --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
+// RUN: --cuda-include-ptx=all --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
// RUN: -c %s 2>&1 \
// RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM60,PTX-SM52 %s
@@ -157,12 +157,12 @@
// c) --no-cuda-include-ptx=sm_XX disables PTX inclusion for that GPU only.
// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \
// RUN: --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
-// RUN: --no-cuda-include-ptx=sm_60 \
+// RUN: --no-cuda-include-ptx=sm_60 --cuda-include-ptx=sm_52 \
// RUN: -c %s 2>&1 \
// RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM60,PTX-SM52 %s
// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \
// RUN: --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
-// RUN: --no-cuda-include-ptx=sm_52 \
+// RUN: --no-cuda-include-ptx=sm_52 --cuda-include-ptx=sm_60 \
// RUN: -c %s 2>&1 \
// RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM60,NOPTX-SM52 %s
@@ -183,8 +183,8 @@
// Verify -flto=thin -fwhole-program-vtables handling. This should result in
// both options being passed to the host compilation, with neither passed to
// the device compilation.
-// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc -c -flto=thin -fwhole-program-vtables %s 2>&1 \
-// RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,HOST,INCLUDES-DEVICE,NOLINK,THINLTOWPD %s
+// RUN: %clang -### --cuda-include-ptx=sm_60 --target=x86_64-linux-gnu -nogpulib -nogpuinc -c -flto=thin -fwhole-program-vtables %s 2>&1 \
+// RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,HOST,NOLINK,THINLTOWPD %s
// THINLTOWPD-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
// ARCH-SM52: "-cc1"{{.*}}"-target-cpu" "sm_52"
diff --git a/clang/test/Driver/cuda-output-asm.cu b/clang/test/Driver/cuda-output-asm.cu
index 6b944d18917247..9d5b86bcbc1b46 100644
--- a/clang/test/Driver/cuda-output-asm.cu
+++ b/clang/test/Driver/cuda-output-asm.cu
@@ -17,13 +17,9 @@
// SM30-DAG: "-cc1" "-triple" "nvptx64-nvidia-cuda"
// SM30-same: "-target-cpu" "sm_30"
-// RUN: not %clang -### -S --target=x86_64-linux-gnu -o foo.s %s 2>&1 \
-// RUN: | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s
// RUN: not %clang -### -S --target=x86_64-linux-gnu --cuda-device-only \
// RUN: --cuda-gpu-arch=sm_20 --cuda-gpu-arch=sm_30 -o foo.s %s 2>&1 \
// RUN: | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s
-// RUN: not %clang -### -emit-llvm -c --target=x86_64-linux-gnu -o foo.s %s 2>&1 \
-// RUN: | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s
// MULTIPLE-OUTPUT-FILES: error: cannot specify -o when generating multiple output files
// Make sure we do not get duplicate diagnostics.
// MULTIPLE-OUTPUT-FILES-NOT: error: cannot specify -o when generating multiple output files
More information about the cfe-commits
mailing list