[clang] [Offload] Move HIP and CUDA to new driver by default (PR #84420)

Joseph Huber via cfe-commits cfe-commits at lists.llvm.org
Fri Mar 8 07:07:49 PST 2024


https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/84420

>From 3e2846b721ba17d44a05d7d97b377fa3a43c8cef Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Thu, 7 Mar 2024 15:48:00 -0600
Subject: [PATCH] [Offload] Move HIP and CUDA to new driver by default

Summary:
This patch updates the `--offload-new-driver` flag to be default for all
current offloading languages. This mostly just required updating a lot
of tests to use the old format. I tried to update them where possible,
but some were directly checking the old format.

This is not intended to be landed immediately, but to allow for greater
testing. One potential issue I've discovered is the lack of SPIR-V
support or handling for `--offload`.
---
 clang/lib/Driver/Driver.cpp                   |  6 ++---
 clang/lib/Driver/ToolChains/Clang.cpp         | 10 ++++---
 clang/test/Driver/cl-offload.cu               |  5 ++--
 clang/test/Driver/cuda-arch-translation.cu    | 26 +++++++++----------
 clang/test/Driver/cuda-bindings.cu            | 24 ++++++++---------
 clang/test/Driver/cuda-options.cu             | 23 ++++++++--------
 clang/test/Driver/cuda-output-asm.cu          |  4 ---
 clang/test/Driver/cuda-version-check.cu       |  6 ++---
 clang/test/Driver/hip-gz-options.hip          |  1 -
 clang/test/Driver/hip-invalid-target-id.hip   |  4 +--
 clang/test/Driver/hip-macros.hip              |  3 ---
 clang/test/Driver/hip-offload-arch.hip        |  2 +-
 clang/test/Driver/hip-options.hip             |  8 ++----
 clang/test/Driver/hip-sanitize-options.hip    |  2 +-
 clang/test/Driver/hip-save-temps.hip          | 12 ++++-----
 .../test/Driver/hip-toolchain-device-only.hip |  4 ---
 clang/test/Driver/hip-toolchain-mllvm.hip     |  2 --
 clang/test/Driver/invalid-offload-options.cpp |  2 +-
 clang/test/Preprocessor/cuda-preprocess.cu    |  8 +++---
 clang/unittests/Tooling/ToolingTest.cpp       |  6 ++---
 20 files changed, 71 insertions(+), 87 deletions(-)

diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index fce43430a91374..e85a3e675408e4 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -4115,9 +4115,9 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
   handleArguments(C, Args, Inputs, Actions);
 
   bool UseNewOffloadingDriver =
-      C.isOffloadingHostKind(Action::OFK_OpenMP) ||
+      C.getActiveOffloadKinds() != Action::OFK_None &&
       Args.hasFlag(options::OPT_offload_new_driver,
-                   options::OPT_no_offload_new_driver, false);
+                   options::OPT_no_offload_new_driver, true);
 
   // Builder to be used to build offloading actions.
   std::unique_ptr<OffloadingActionBuilder> OffloadBuilder =
@@ -4802,7 +4802,7 @@ Action *Driver::ConstructPhaseAction(
                    offloadDeviceOnly() ||
                    (TargetDeviceOffloadKind == Action::OFK_HIP &&
                     !Args.hasFlag(options::OPT_offload_new_driver,
-                                  options::OPT_no_offload_new_driver, false)))
+                                  options::OPT_no_offload_new_driver, true)))
               ? types::TY_LLVM_IR
               : types::TY_LLVM_BC;
       return C.MakeAction<BackendJobAction>(Input, Output);
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index fa17f6295d6ea7..9605fb28e5fe34 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4687,8 +4687,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   bool IsHostOffloadingAction =
       JA.isHostOffloading(Action::OFK_OpenMP) ||
       (JA.isHostOffloading(C.getActiveOffloadKinds()) &&
+       C.getActiveOffloadKinds() != Action::OFK_None &&
        Args.hasFlag(options::OPT_offload_new_driver,
-                    options::OPT_no_offload_new_driver, false));
+                    options::OPT_no_offload_new_driver, true));
 
   bool IsRDCMode =
       Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false);
@@ -4997,7 +4998,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     if (IsUsingLTO) {
       if (IsDeviceOffloadAction && !JA.isDeviceOffloading(Action::OFK_OpenMP) &&
           !Args.hasFlag(options::OPT_offload_new_driver,
-                        options::OPT_no_offload_new_driver, false) &&
+                        options::OPT_no_offload_new_driver, true) &&
           !Triple.isAMDGPU()) {
         D.Diag(diag::err_drv_unsupported_opt_for_target)
             << Args.getLastArg(options::OPT_foffload_lto,
@@ -6521,8 +6522,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   }
 
   // Forward the new driver to change offloading code generation.
-  if (Args.hasFlag(options::OPT_offload_new_driver,
-                   options::OPT_no_offload_new_driver, false))
+  if (C.getActiveOffloadKinds() != Action::OFK_None &&
+      Args.hasFlag(options::OPT_offload_new_driver,
+                   options::OPT_no_offload_new_driver, true))
     CmdArgs.push_back("--offload-new-driver");
 
   SanitizeArgs.addArgs(TC, Args, CmdArgs, InputType);
diff --git a/clang/test/Driver/cl-offload.cu b/clang/test/Driver/cl-offload.cu
index b05bf3b97b7eb7..8f1200f1733597 100644
--- a/clang/test/Driver/cl-offload.cu
+++ b/clang/test/Driver/cl-offload.cu
@@ -18,11 +18,10 @@
 // CUDA-SAME: "-Weverything"
 // CUDA: link
 
-// HIP: "-cc1" "-triple" "x86_64-pc-windows-msvc{{.*}}" "-aux-triple" "amdgcn-amd-amdhsa"
-// HIP-SAME: "-Weverything"
 // HIP: "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-pc-windows-msvc"
 // HIP-SAME: "-Weverything"
-// HIP: {{lld.* "-flavor" "gnu" "-m" "elf64_amdgpu"}}
+// HIP: "-cc1" "-triple" "x86_64-pc-windows-msvc{{.*}}" "-aux-triple" "amdgcn-amd-amdhsa"
+// HIP-SAME: "-Weverything"
 // HIP: {{link.* "amdhip64.lib"}}
 
 // CMake uses this option when finding packages for HIP, so
diff --git a/clang/test/Driver/cuda-arch-translation.cu b/clang/test/Driver/cuda-arch-translation.cu
index f37964d87c66a1..757defec959c47 100644
--- a/clang/test/Driver/cuda-arch-translation.cu
+++ b/clang/test/Driver/cuda-arch-translation.cu
@@ -69,19 +69,19 @@
 
 // HIP: clang-offload-bundler
 
-// SM20:--image=profile=sm_20{{.*}}--image=profile=compute_20
-// SM21:--image=profile=sm_21{{.*}}--image=profile=compute_20
-// SM30:--image=profile=sm_30{{.*}}--image=profile=compute_30
-// SM32:--image=profile=sm_32{{.*}}--image=profile=compute_32
-// SM35:--image=profile=sm_35{{.*}}--image=profile=compute_35
-// SM37:--image=profile=sm_37{{.*}}--image=profile=compute_37
-// SM50:--image=profile=sm_50{{.*}}--image=profile=compute_50
-// SM52:--image=profile=sm_52{{.*}}--image=profile=compute_52
-// SM53:--image=profile=sm_53{{.*}}--image=profile=compute_53
-// SM60:--image=profile=sm_60{{.*}}--image=profile=compute_60
-// SM61:--image=profile=sm_61{{.*}}--image=profile=compute_61
-// SM62:--image=profile=sm_62{{.*}}--image=profile=compute_62
-// SM70:--image=profile=sm_70{{.*}}--image=profile=compute_70
+// SM20:--image=profile=sm_20{{.*}}
+// SM21:--image=profile=sm_21{{.*}}
+// SM30:--image=profile=sm_30{{.*}}
+// SM32:--image=profile=sm_32{{.*}}
+// SM35:--image=profile=sm_35{{.*}}
+// SM37:--image=profile=sm_37{{.*}}
+// SM50:--image=profile=sm_50{{.*}}
+// SM52:--image=profile=sm_52{{.*}}
+// SM53:--image=profile=sm_53{{.*}}
+// SM60:--image=profile=sm_60{{.*}}
+// SM61:--image=profile=sm_61{{.*}}
+// SM62:--image=profile=sm_62{{.*}}
+// SM70:--image=profile=sm_70{{.*}}
 // GFX600:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx600
 // GFX601:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx601
 // GFX602:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx602
diff --git a/clang/test/Driver/cuda-bindings.cu b/clang/test/Driver/cuda-bindings.cu
index e130e08f20152a..3b95aa4bd50541 100644
--- a/clang/test/Driver/cuda-bindings.cu
+++ b/clang/test/Driver/cuda-bindings.cu
@@ -26,14 +26,14 @@
 // BIN-NOT: cuda-bindings-device-cuda-nvptx64
 // BIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}}  output:
 // BIN-NOT: cuda-bindings-device-cuda-nvptx64
-// BIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
+// BIN: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out"
 
 //
 // Test single gpu architecture up to the assemble phase.
 //
 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --cuda-gpu-arch=sm_30 %s -S 2>&1 \
 // RUN: | FileCheck -check-prefix=ASM %s
-// ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
+// ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[BINDINGS:.+.s]]"
 // ASM-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
 
 //
@@ -64,8 +64,8 @@
 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
 // BIN2: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}}  output:
 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
-// AOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
-// TOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "{{.*}}/out"
+// AOUT: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out"
+// TOUT: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "{{.*}}/out"
 
 // .. same, but with -fsyntax-only
 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
@@ -81,9 +81,9 @@
 // RUN:        --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
 // RUN: | FileCheck -check-prefix=SYN %s
 // SYN-NOT: inputs:
-// SYN: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
-// SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
+//      SYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
 // SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
+// SYN-NEXT: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
 // SYN-NOT: inputs
 
 // .. and with --offload-new-driver
@@ -100,7 +100,7 @@
 // RUN:        --offload-arch=sm_30,sm_35 %s --offload-new-driver -o %t/out 2>&1 \
 // RUN: | FileCheck -check-prefix=NDSYN %s
 // NDSYN-NOT: inputs:
-// NDSYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
+//      NDSYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
 // NDSYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
 // NDSYN-NEXT: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
 // NDSYN-NOT: inputs:
@@ -112,8 +112,8 @@
 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
 // RUN:        --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \
 // RUN: | FileCheck -check-prefix=ASM2 %s
-// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
-// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.s"
+// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[SM30:.+.s]]"
+// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[SM35:.+.s]]"
 // ASM2-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
 
 //
@@ -128,7 +128,7 @@
 // RUN: | FileCheck -check-prefix=HBIN %s
 // HBIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}}  output:
 // HBIN-NOT: cuda-bindings-device-cuda-nvptx64
-// HBIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
+// HBIN: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out"
 
 //
 // Test one or more gpu architecture up to the assemble phase in host-only
@@ -166,7 +166,7 @@
 // Test two gpu architectures with complete compilation in device-only
 // compilation mode.
 //
-// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
+// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --no-offload-new-driver \
 // RUN:        --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only 2>&1 \
 // RUN: | FileCheck -check-prefix=DBIN2 %s
 // DBIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
@@ -180,7 +180,7 @@
 // Test two gpu architectures up to the assemble phase in device-only
 // compilation mode.
 //
-// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
+// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --no-offload-new-driver \
 // RUN:        --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only -S 2>&1 \
 // RUN: | FileCheck -check-prefix=DASM2 %s
 // DASM2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu
index 8999a6618fe1fa..3aef694b56f496 100644
--- a/clang/test/Driver/cuda-options.cu
+++ b/clang/test/Driver/cuda-options.cu
@@ -4,13 +4,13 @@
 
 // Simple compilation case. Compile device-side to PTX assembly and make sure
 // we use it on the host side.
-// RUN: %clang -### -target x86_64-linux-gnu -c -nogpulib -nogpuinc %s 2>&1 \
+// RUN: %clang -### --cuda-include-ptx=all -target x86_64-linux-gnu -c -nogpulib -nogpuinc %s 2>&1 \
 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
 // RUN:    -check-prefix NOLINK %s
 
 // Typical compilation + link case.
-// RUN: %clang -### -target x86_64-linux-gnu -nogpulib -nogpuinc %s 2>&1 \
+// RUN: %clang -### --cuda-include-ptx=all -target x86_64-linux-gnu -nogpulib -nogpuinc %s 2>&1 \
 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
 // RUN:    -check-prefix LINK %s
@@ -35,7 +35,7 @@
 // RUN:    -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
 
 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-compile-host-device \
-// RUN:    --cuda-host-only -nogpulib -nogpuinc %s 2>&1 \
+// RUN:    --cuda-host-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
 // RUN:    -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
 
@@ -50,27 +50,27 @@
 // RUN:    -check-prefix NOHOST -check-prefix NOLINK %s
 
 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-host-only \
-// RUN:   -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
+// RUN:   --cuda-include-ptx=all -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
 // RUN:    -check-prefix LINK %s
 
 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
-// RUN:   -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
+// RUN:   --cuda-include-ptx=all -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
 // RUN:    -check-prefix LINK %s
 
 // Verify that --cuda-gpu-arch option passes the correct GPU architecture to
 // device compilation.
-// RUN: %clang -### -nogpulib -nogpuinc --target=x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \
+// RUN: %clang -### --cuda-include-ptx=all -nogpulib -nogpuinc --target=x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
 // RUN:    -check-prefix DEVICE-SM52 -check-prefix HOST \
 // RUN:    -check-prefix INCLUDES-DEVICE -check-prefix NOLINK %s
 
 // Verify that there is one device-side compilation per --cuda-gpu-arch args
 // and that all results are included on the host side.
-// RUN: %clang -### --target=x86_64-linux-gnu \
+// RUN: %clang -### --target=x86_64-linux-gnu --cuda-include-ptx=all \
 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 -c %s 2>&1 \
 // RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,DEVICE2 \
 // RUN:             -check-prefixes DEVICE-SM52,DEVICE2-SM60 \
@@ -130,9 +130,9 @@
 // f) --no-cuda-gpu-arch=all negates all preceding --cuda-gpu-arch=X
 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
-// RUN:   --no-cuda-gpu-arch=all \
+// RUN:   --no-cuda-version-check --no-cuda-gpu-arch=all \
 // RUN:   --cuda-gpu-arch=sm_70 \
-// RUN:   -c -nogpulib -nogpuinc %s 2>&1 \
+// RUN:   -c --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
 // RUN: | FileCheck -check-prefixes NOARCH-SM52,NOARCH-SM60,ARCH-SM70 %s
 
 // g) There's no --cuda-gpu-arch=all
@@ -143,7 +143,7 @@
 
 
 // Verify that --[no-]cuda-include-ptx arguments are handled correctly.
-// a) by default we're including PTX for all GPUs.
+// a) by default we're not including PTX for all GPUs.
 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \
 // RUN:   --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
 // RUN:   -c %s 2>&1 \
@@ -185,7 +185,8 @@
 // Verify -flto=thin -fwhole-program-vtables handling. This should result in
 // both options being passed to the host compilation, with neither passed to
 // the device compilation.
-// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc -c -flto=thin -fwhole-program-vtables %s 2>&1 \
+// RUN: %clang -### --target=x86_64-linux-gnu --cuda-include-ptx=all \
+// RUN:   -nogpulib -nogpuinc -c -flto=thin -fwhole-program-vtables %s 2>&1 \
 // RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,HOST,INCLUDES-DEVICE,NOLINK,THINLTOWPD %s
 // THINLTOWPD-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
 
diff --git a/clang/test/Driver/cuda-output-asm.cu b/clang/test/Driver/cuda-output-asm.cu
index ed1aeadc243a5e..be08f6de47bf9d 100644
--- a/clang/test/Driver/cuda-output-asm.cu
+++ b/clang/test/Driver/cuda-output-asm.cu
@@ -20,13 +20,9 @@
 // SM30-DAG: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 // SM30-same: "-target-cpu" "sm_30"
 
-// RUN: not %clang -### -S --target=x86_64-linux-gnu -o foo.s %s 2>&1 \
-// RUN:   | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s
 // RUN: not %clang -### -S --target=x86_64-linux-gnu --cuda-device-only \
 // RUN:   --cuda-gpu-arch=sm_20 --cuda-gpu-arch=sm_30 -o foo.s %s 2>&1 \
 // RUN:   | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s
-// RUN: not %clang -### -emit-llvm -c --target=x86_64-linux-gnu -o foo.s %s 2>&1 \
-// RUN:   | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s
 // MULTIPLE-OUTPUT-FILES: error: cannot specify -o when generating multiple output files
 // Make sure we do not get duplicate diagnostics.
 // MULTIPLE-OUTPUT-FILES-NOT: error: cannot specify -o when generating multiple output files
diff --git a/clang/test/Driver/cuda-version-check.cu b/clang/test/Driver/cuda-version-check.cu
index dc313e928c7d27..0089f45d459ce9 100644
--- a/clang/test/Driver/cuda-version-check.cu
+++ b/clang/test/Driver/cuda-version-check.cu
@@ -65,12 +65,12 @@
 // ERR_SM20: error: GPU arch sm_20 {{.*}}
 // ERR_SM20-NOT: error: GPU arch sm_20
 
-// ERR_SM60: error: GPU arch sm_60 {{.*}}
-// ERR_SM60-NOT: error: GPU arch sm_60
-
 // ERR_SM61: error: GPU arch sm_61 {{.*}}
 // ERR_SM61-NOT: error: GPU arch sm_61
 
+// ERR_SM60: error: GPU arch sm_60 {{.*}}
+// ERR_SM60-NOT: error: GPU arch sm_60
+
 // UNKNOWN_VERSION: CUDA version is newer than the latest{{.*}} supported version
 // UNKNOWN_VERSION_CXX-NOT: unknown CUDA version
 
diff --git a/clang/test/Driver/hip-gz-options.hip b/clang/test/Driver/hip-gz-options.hip
index 8fba391c08b064..2d83a7266870a1 100644
--- a/clang/test/Driver/hip-gz-options.hip
+++ b/clang/test/Driver/hip-gz-options.hip
@@ -11,4 +11,3 @@
 // CHECK-DAG: {{".*clang.*" .* "--compress-debug-sections=zlib"}}
 // CHECK-DAG: {{".*lld" .* "--compress-debug-sections=zlib"}}
 // CHECK-DAG: {{".*clang.*" .* "--compress-debug-sections=zlib"}}
-// CHECK: "--compress-debug-sections=zlib"
diff --git a/clang/test/Driver/hip-invalid-target-id.hip b/clang/test/Driver/hip-invalid-target-id.hip
index c74037d9a4d6da..f7c581ae62fbff 100644
--- a/clang/test/Driver/hip-invalid-target-id.hip
+++ b/clang/test/Driver/hip-invalid-target-id.hip
@@ -7,7 +7,7 @@
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   %s 2>&1 | FileCheck -check-prefix=NOPLUS %s
 
-// NOPLUS: error: invalid target ID 'gfx908xnack'
+// NOPLUS: error: unsupported HIP gpu architecture: gfx908xnack
 
 // RUN: not %clang -### --target=x86_64-linux-gnu \
 // RUN:   -x hip --offload-arch=gfx900 \
@@ -58,7 +58,7 @@
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   %s 2>&1 | FileCheck -check-prefix=NOCOLON %s
 
-// NOCOLON: error: invalid target ID 'gfx900+xnack'
+// NOCOLON: error: unsupported HIP gpu architecture: gfx900+xnack
 
 // RUN: not %clang -### --target=x86_64-linux-gnu \
 // RUN:   -x hip --offload-arch=gfx908 \
diff --git a/clang/test/Driver/hip-macros.hip b/clang/test/Driver/hip-macros.hip
index 3b3afba0b18ca3..36e0f71bd6eff6 100644
--- a/clang/test/Driver/hip-macros.hip
+++ b/clang/test/Driver/hip-macros.hip
@@ -73,8 +73,6 @@
 // RUN: %clang -E -dM --offload-arch=gfx940 --cuda-device-only -nogpuinc -nogpulib \
 // RUN:   %s 2>&1 | FileCheck --check-prefixes=NOPTS %s
 // PTS-DAG: #define __HIP_API_PER_THREAD_DEFAULT_STREAM__ 1
-// PTS-DAG: #define __HIP_API_PER_THREAD_DEFAULT_STREAM__ 1
-// PTS-DAG: #define HIP_API_PER_THREAD_DEFAULT_STREAM 1
 // PTS-DAG: #define HIP_API_PER_THREAD_DEFAULT_STREAM 1
 // NOPTS-NOT: #define __HIP_API_PER_THREAD_DEFAULT_STREAM__
 // NOPTS-NOT: #define HIP_API_PER_THREAD_DEFAULT_STREAM
@@ -85,4 +83,3 @@
 // RUN:   %s 2>&1 | FileCheck --check-prefix=APPROX %s
 // NOAPPROX-NOT: #define __CLANG_GPU_APPROX_TRANSCENDENTALS__
 // APPROX: #define __CLANG_GPU_APPROX_TRANSCENDENTALS__ 1
-// APPROX: #define __CLANG_GPU_APPROX_TRANSCENDENTALS__ 1
diff --git a/clang/test/Driver/hip-offload-arch.hip b/clang/test/Driver/hip-offload-arch.hip
index 12b28bd2c89e38..77e0cb2dc6f5bb 100644
--- a/clang/test/Driver/hip-offload-arch.hip
+++ b/clang/test/Driver/hip-offload-arch.hip
@@ -6,5 +6,5 @@
 // RUN:   -nogpuinc -nogpulib \
 // RUN:   %s 2>&1 | FileCheck %s
 
-// CHECK: {{"[^"]*clang[^"]*".* "-target-cpu" "gfx1030"}}
 // CHECK: {{"[^"]*clang[^"]*".* "-target-cpu" "gfx1031"}}
+// CHECK: {{"[^"]*clang[^"]*".* "-target-cpu" "gfx1030"}}
diff --git a/clang/test/Driver/hip-options.hip b/clang/test/Driver/hip-options.hip
index 2ba9032f16946b..326517f0e1351a 100644
--- a/clang/test/Driver/hip-options.hip
+++ b/clang/test/Driver/hip-options.hip
@@ -86,10 +86,6 @@
 // RUN:   --cuda-gpu-arch=gfx906 -foffload-lto=thin -fwhole-program-vtables %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=HIPTHINLTO %s
 
-// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
-// RUN:   --cuda-gpu-arch=gfx906 -fgpu-rdc -foffload-lto=thin -fwhole-program-vtables %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=HIPTHINLTO %s
-
 // Ensure we don't error about -fwhole-program-vtables for the non-device offload compile.
 // HIPTHINLTO-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
 // HIPTHINLTO-NOT: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit"
@@ -125,7 +121,7 @@
 
 // Check -Xoffload-linker option is passed to lld.
 
-// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib --no-offload-new-driver \
 // RUN:   --cuda-gpu-arch=gfx906 -fgpu-rdc -Xoffload-linker --build-id=md5 %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=OFL-LINK %s
 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
@@ -244,4 +240,4 @@
 // RUN:   2>&1 | FileCheck -check-prefix=NO-WARN-ATOMIC %s
 // NO-WARN-ATOMIC: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-Werror=atomic-alignment" {{.*}} "-Wno-error=atomic-alignment"
 // NO-WARN-ATOMIC-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-Werror=atomic-alignment"
-// NO-WARN-ATOMIC-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-Wno-error=atomic-alignment"
\ No newline at end of file
+// NO-WARN-ATOMIC-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-Wno-error=atomic-alignment"
diff --git a/clang/test/Driver/hip-sanitize-options.hip b/clang/test/Driver/hip-sanitize-options.hip
index 184889d357a47f..30ba3ea0cf8c83 100644
--- a/clang/test/Driver/hip-sanitize-options.hip
+++ b/clang/test/Driver/hip-sanitize-options.hip
@@ -58,8 +58,8 @@
 // NORDC-NOT: {{"[^"]*lld(\.exe){0,1}".*}} "[[OUT]]" {{".*asanrtl.bc" ".*hip.bc"}}
 // NORDC: {{"[^"]*clang[^"]*".* "-triple" "x86_64-unknown-linux-gnu".* "-fsanitize=address"}}
 
-// RDC: {{"[^"]*clang[^"]*".* "-triple" "x86_64-unknown-linux-gnu".* "-fsanitize=address"}}
 // RDC: {{"[^"]*clang[^"]*".* "-emit-llvm-bc".* "-fcuda-is-device".* "-mlink-bitcode-file" ".*asanrtl.bc".* "-mlink-builtin-bitcode" ".*hip.bc".* "-fsanitize=address".*}} "-o" "[[OUT:[^"]*.bc]]"
+// RDC: {{"[^"]*clang[^"]*".* "-triple" "x86_64-unknown-linux-gnu".* "-fsanitize=address"}}
 // RDC-NOT: {{"[^"]*lld(\.exe){0,1}".*}} "[[OUT]]" {{".*asanrtl.bc" ".*hip.bc"}}
 
 // FAIL: AMDGPU address sanitizer runtime library (asanrtl) is not found. Please install ROCm device library which supports address sanitizer
diff --git a/clang/test/Driver/hip-save-temps.hip b/clang/test/Driver/hip-save-temps.hip
index d891b0d9bf627f..0527c049ffa29f 100644
--- a/clang/test/Driver/hip-save-temps.hip
+++ b/clang/test/Driver/hip-save-temps.hip
@@ -3,32 +3,32 @@
 
 // -fno-gpu-rdc without -o with -c
 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
-// RUN:   -nogpuinc --offload-arch=gfx900 -c %s 2>&1 | \
+// RUN:   --no-offload-new-driver -nogpuinc --offload-arch=gfx900 -c %s 2>&1 | \
 // RUN:   FileCheck -check-prefixes=CHECK,NORDC %s
 
 // -fno-gpu-rdc without -o
 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
-// RUN:   -nogpuinc --offload-arch=gfx900 %s 2>&1 | \
+// RUN:   --no-offload-new-driver -nogpuinc --offload-arch=gfx900 %s 2>&1 | \
 // RUN:   FileCheck -check-prefixes=CHECK,NORDC,NOUT %s
 
 // -fno-gpu-rdc with -o
 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
-// RUN:   -nogpuinc -o executable --offload-arch=gfx900 %s 2>&1 | \
+// RUN:   --no-offload-new-driver -nogpuinc -o executable --offload-arch=gfx900 %s 2>&1 | \
 // RUN:   FileCheck -check-prefixes=CHECK,NORDC,WOUT %s
 
 // -fgpu-rdc without -o with -c
 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
-// RUN:   -nogpuinc -fgpu-rdc --offload-arch=gfx900 -c %s 2>&1 | \
+// RUN:   --no-offload-new-driver -nogpuinc -fgpu-rdc --offload-arch=gfx900 -c %s 2>&1 | \
 // RUN:   FileCheck -check-prefixes=CHECK,RDC,RDCC %s
 
 // -fgpu-rdc without -o
 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
-// RUN:   -nogpuinc -fgpu-rdc --offload-arch=gfx900 %s 2>&1 | \
+// RUN:   --no-offload-new-driver -nogpuinc -fgpu-rdc --offload-arch=gfx900 %s 2>&1 | \
 // RUN:   FileCheck -check-prefixes=CHECK,RDC,RDCL,NOUT %s
 
 // -fgpu-rdc with -o
 // UN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
-// UN:   -nogpuinc -o executable -fgpu-rdc --offload-arch=gfx900 %s 2>&1 | \
+// UN:   --offload-new-driver -nogpuinc -o executable -fgpu-rdc --offload-arch=gfx900 %s 2>&1 | \
 // UN:   FileCheck -check-prefixes=CHECK,RDC,RDCL,WOUT %s
 
 // -fgpu-rdc host object path
diff --git a/clang/test/Driver/hip-toolchain-device-only.hip b/clang/test/Driver/hip-toolchain-device-only.hip
index c6ff67bde738c0..b6bc6ba7bd64ec 100644
--- a/clang/test/Driver/hip-toolchain-device-only.hip
+++ b/clang/test/Driver/hip-toolchain-device-only.hip
@@ -23,7 +23,3 @@
 
 // CHECK: [[LLD]] "-flavor" "gnu" "-m" "elf64_amdgpu" "--no-undefined" "-shared"
 // CHECK-SAME: "-o" "[[IMG_DEV_A_900:.*out]]" [[OBJ_DEV_A_900]]
-
-// CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
-// CHECK-SAME: "-targets={{.*}},hip{{.*}}-amdgcn-amd-amdhsa--gfx803,hip{{.*}}-amdgcn-amd-amdhsa--gfx900"
-// CHECK-SAME: "-input={{.*}}" "-input=[[IMG_DEV_A_803]]" "-input=[[IMG_DEV_A_900]]" "-output=[[BUNDLE_A:.*hipfb]]"
diff --git a/clang/test/Driver/hip-toolchain-mllvm.hip b/clang/test/Driver/hip-toolchain-mllvm.hip
index 110d1c9b7fd33e..a13e8804ea2340 100644
--- a/clang/test/Driver/hip-toolchain-mllvm.hip
+++ b/clang/test/Driver/hip-toolchain-mllvm.hip
@@ -33,13 +33,11 @@
 // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
 // CHECK-SAME: {{.*}} "-target-cpu" "gfx803"
 // CHECK-SAME: {{.*}} "-mllvm" "-unroll-count=10" {{.*}}
-// CHECK: [[LLD:".*lld.*"]] {{.*}}"-m" "elf64_amdgpu"{{.*}} "-plugin-opt=-unroll-count=10"{{.*}} "-plugin-opt=-inline-threshold=100"
 
 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
 // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
 // CHECK-SAME: {{.*}} "-target-cpu" "gfx900"
 // CHECK-SAME: {{.*}} "-mllvm" "-unroll-count=10" {{.*}}
-// CHECK: [[LLD:".*lld.*"]] {{.*}} "-plugin-opt=-unroll-count=10"{{.*}} "-plugin-opt=-inline-threshold=100"
 
 // NEG-NOT: {{".*opt"}}
 // NEG-NOT: {{".*llc"}}
diff --git a/clang/test/Driver/invalid-offload-options.cpp b/clang/test/Driver/invalid-offload-options.cpp
index f9bb5cf9ab5636..1f6e79a3db9b56 100644
--- a/clang/test/Driver/invalid-offload-options.cpp
+++ b/clang/test/Driver/invalid-offload-options.cpp
@@ -27,4 +27,4 @@
 // RUN:   --offload=amdgcn-amd-amdhsa --offload-arch=gfx900 %s \
 // RUN: 2>&1 | FileCheck --check-prefix=OFFLOAD-ARCH-MIX %s
 
-// OFFLOAD-ARCH-MIX: error: option '--offload-arch' cannot be specified with '--offload'
+// OFFLOAD-ARCH-MIX: error: option '--offload' cannot be specified with '--offload-arch'
diff --git a/clang/test/Preprocessor/cuda-preprocess.cu b/clang/test/Preprocessor/cuda-preprocess.cu
index da40c4c69fb34a..11e348952ca7ea 100644
--- a/clang/test/Preprocessor/cuda-preprocess.cu
+++ b/clang/test/Preprocessor/cuda-preprocess.cu
@@ -18,14 +18,14 @@ clang_unittest_cuda_arch __CUDA_ARCH__
 // RUN:   | FileCheck -check-prefix NOARCH %s
 // NOARCH: clang_unittest_no_arch
 
-// RUN: %clang -E -target x86_64-linux-gnu --cuda-gpu-arch=sm_20 --cuda-device-only -nocudainc -nocudalib %s 2>&1 \
-// RUN:   | FileCheck -check-prefix SM20 %s
-// SM20: clang_unittest_cuda_arch 200
-
 // RUN: %clang -E -target x86_64-linux-gnu --cuda-gpu-arch=sm_30 --cuda-device-only -nocudainc -nocudalib %s 2>&1 \
 // RUN:   | FileCheck -check-prefix SM30 %s
 // SM30: clang_unittest_cuda_arch 300
 
+// RUN: %clang -E -target x86_64-linux-gnu --cuda-gpu-arch=sm_20 --cuda-device-only -nocudainc -nocudalib %s 2>&1 \
+// RUN:   | FileCheck -check-prefix SM20 %s
+// SM20: clang_unittest_cuda_arch 200
+
 // RUN: %clang -E -target x86_64-linux-gnu --cuda-gpu-arch=sm_20 --cuda-gpu-arch=sm_30 \
 // RUN:   --cuda-device-only -nocudainc -nocudalib %s 2>&1 \
 // RUN:   | FileCheck -check-prefix SM20 -check-prefix SM30 %s
diff --git a/clang/unittests/Tooling/ToolingTest.cpp b/clang/unittests/Tooling/ToolingTest.cpp
index 2e3da2cd2a7013..96580865833fdb 100644
--- a/clang/unittests/Tooling/ToolingTest.cpp
+++ b/clang/unittests/Tooling/ToolingTest.cpp
@@ -421,9 +421,9 @@ TEST_F(CommandLineExtractorTest, AcceptOffloadingCompile) {
 TEST_F(CommandLineExtractorTest, AcceptOffloadingSyntaxOnly) {
   addFile("test.c", "int main() {}\n");
   const char *Args[] = {
-      "clang",         "-target",   "arm64-apple-macosx11.0.0",
-      "-fsyntax-only", "-x",        "hip",
-      "test.c",        "-nogpulib", "-nogpuinc"};
+      "clang",     "-target",  "arm64-apple-macosx11.0.0", "-fsyntax-only",
+      "-x",        "hip",      "--no-offload-new-driver",  "test.c",
+      "-nogpulib", "-nogpuinc"};
   EXPECT_NE(extractCC1Arguments(Args), nullptr);
 }
 



More information about the cfe-commits mailing list