[clang] [llvm] [Offload] Move HIP and CUDA to new driver by default (PR #84420)

Joseph Huber via cfe-commits cfe-commits at lists.llvm.org
Tue Nov 19 13:59:54 PST 2024


https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/84420

>From 90a6145e2bc22ed511a11306307488a525f3738f Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Thu, 7 Mar 2024 15:48:00 -0600
Subject: [PATCH] [Offload] Move HIP and CUDA to new driver by default

Summary:
This patch updates the `--offload-new-driver` flag to be default for all
current offloading languages. This mostly just required updating a lot
of tests to use the old format. I tried to update them where possible,
but some were directly checking the old format.

This is not intended to be landed immediately, but to allow for greater
testing. One potential issue I've discovered is the lack of SPIR-V
support or handling for `--offload`.
---
 clang/lib/Driver/Driver.cpp                   |  8 +++---
 clang/lib/Driver/ToolChains/Clang.cpp         | 22 ++++++++--------
 clang/test/Driver/cl-offload.cu               |  5 ++--
 clang/test/Driver/cuda-arch-translation.cu    | 26 +++++++++----------
 clang/test/Driver/cuda-bindings.cu            | 24 ++++++++---------
 clang/test/Driver/cuda-options.cu             | 23 ++++++++--------
 clang/test/Driver/cuda-output-asm.cu          |  4 ---
 clang/test/Driver/hip-gz-options.hip          |  1 -
 clang/test/Driver/hip-invalid-target-id.hip   |  4 +--
 clang/test/Driver/hip-macros.hip              |  3 ---
 clang/test/Driver/hip-offload-arch.hip        |  4 +--
 clang/test/Driver/hip-options.hip             |  6 +----
 clang/test/Driver/hip-sanitize-options.hip    |  2 +-
 clang/test/Driver/hip-save-temps.hip          | 12 ++++-----
 .../test/Driver/hip-toolchain-device-only.hip |  4 ---
 clang/test/Driver/hip-toolchain-mllvm.hip     |  2 --
 clang/test/Driver/hip-toolchain-no-rdc.hip    |  2 +-
 clang/test/Driver/invalid-offload-options.cpp |  2 +-
 .../ClangLinkerWrapper.cpp                    |  9 +++++--
 clang/unittests/Tooling/ToolingTest.cpp       |  6 ++---
 llvm/lib/Object/OffloadBinary.cpp             | 13 +++++++---
 21 files changed, 86 insertions(+), 96 deletions(-)

diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index a0f4329e36136b..87ca203360d53e 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -4182,11 +4182,9 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
   handleArguments(C, Args, Inputs, Actions);
 
   bool UseNewOffloadingDriver =
-      C.isOffloadingHostKind(Action::OFK_OpenMP) ||
-      Args.hasFlag(options::OPT_foffload_via_llvm,
-                   options::OPT_fno_offload_via_llvm, false) ||
+      C.getActiveOffloadKinds() != Action::OFK_None &&
       Args.hasFlag(options::OPT_offload_new_driver,
-                   options::OPT_no_offload_new_driver, false);
+                   options::OPT_no_offload_new_driver, true);
 
   // Builder to be used to build offloading actions.
   std::unique_ptr<OffloadingActionBuilder> OffloadBuilder =
@@ -4912,7 +4910,7 @@ Action *Driver::ConstructPhaseAction(
                    offloadDeviceOnly() ||
                    (TargetDeviceOffloadKind == Action::OFK_HIP &&
                     !Args.hasFlag(options::OPT_offload_new_driver,
-                                  options::OPT_no_offload_new_driver, false)))
+                                  options::OPT_no_offload_new_driver, true)))
               ? types::TY_LLVM_IR
               : types::TY_LLVM_BC;
       return C.MakeAction<BackendJobAction>(Input, Output);
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index d3eec9fea0d498..f8b6d0c010ac9a 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5044,8 +5044,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   bool IsHostOffloadingAction =
       JA.isHostOffloading(Action::OFK_OpenMP) ||
       (JA.isHostOffloading(C.getActiveOffloadKinds()) &&
+       C.getActiveOffloadKinds() != Action::OFK_None &&
        Args.hasFlag(options::OPT_offload_new_driver,
-                    options::OPT_no_offload_new_driver, false));
+                    options::OPT_no_offload_new_driver, true));
 
   bool IsRDCMode =
       Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false);
@@ -5377,7 +5378,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     if (IsUsingLTO) {
       if (IsDeviceOffloadAction && !JA.isDeviceOffloading(Action::OFK_OpenMP) &&
           !Args.hasFlag(options::OPT_offload_new_driver,
-                        options::OPT_no_offload_new_driver, false) &&
+                        options::OPT_no_offload_new_driver, true) &&
           !Triple.isAMDGPU()) {
         D.Diag(diag::err_drv_unsupported_opt_for_target)
             << Args.getLastArg(options::OPT_foffload_lto,
@@ -6846,16 +6847,15 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     Args.addOptOutFlag(CmdArgs, options::OPT_fopenmp_extensions,
                        options::OPT_fno_openmp_extensions);
   }
-  // Forward the offload runtime change to code generation, liboffload implies
-  // new driver. Otherwise, check if we should forward the new driver to change
-  // offloading code generation.
-  if (Args.hasFlag(options::OPT_foffload_via_llvm,
-                   options::OPT_fno_offload_via_llvm, false)) {
-    CmdArgs.append({"--offload-new-driver", "-foffload-via-llvm"});
-  } else if (Args.hasFlag(options::OPT_offload_new_driver,
-                          options::OPT_no_offload_new_driver, false)) {
+
+  // Forward the new driver to change offloading code generation.
+  if (C.getActiveOffloadKinds() != Action::OFK_None &&
+      Args.hasFlag(options::OPT_offload_new_driver,
+                   options::OPT_no_offload_new_driver, true))
     CmdArgs.push_back("--offload-new-driver");
-  }
+  if (Args.hasFlag(options::OPT_foffload_via_llvm,
+                   options::OPT_fno_offload_via_llvm, false))
+    CmdArgs.push_back("-foffload-via-llvm");
 
   const XRayArgs &XRay = TC.getXRayArgs();
   XRay.addArgs(TC, Args, CmdArgs, InputType);
diff --git a/clang/test/Driver/cl-offload.cu b/clang/test/Driver/cl-offload.cu
index b05bf3b97b7eb7..8f1200f1733597 100644
--- a/clang/test/Driver/cl-offload.cu
+++ b/clang/test/Driver/cl-offload.cu
@@ -18,11 +18,10 @@
 // CUDA-SAME: "-Weverything"
 // CUDA: link
 
-// HIP: "-cc1" "-triple" "x86_64-pc-windows-msvc{{.*}}" "-aux-triple" "amdgcn-amd-amdhsa"
-// HIP-SAME: "-Weverything"
 // HIP: "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-pc-windows-msvc"
 // HIP-SAME: "-Weverything"
-// HIP: {{lld.* "-flavor" "gnu" "-m" "elf64_amdgpu"}}
+// HIP: "-cc1" "-triple" "x86_64-pc-windows-msvc{{.*}}" "-aux-triple" "amdgcn-amd-amdhsa"
+// HIP-SAME: "-Weverything"
 // HIP: {{link.* "amdhip64.lib"}}
 
 // CMake uses this option when finding packages for HIP, so
diff --git a/clang/test/Driver/cuda-arch-translation.cu b/clang/test/Driver/cuda-arch-translation.cu
index e96191cc9d4183..a0ae16452692bf 100644
--- a/clang/test/Driver/cuda-arch-translation.cu
+++ b/clang/test/Driver/cuda-arch-translation.cu
@@ -68,19 +68,19 @@
 
 // HIP: clang-offload-bundler
 
-// SM20:--image=profile=sm_20{{.*}}--image=profile=compute_20
-// SM21:--image=profile=sm_21{{.*}}--image=profile=compute_20
-// SM30:--image=profile=sm_30{{.*}}--image=profile=compute_30
-// SM32:--image=profile=sm_32{{.*}}--image=profile=compute_32
-// SM35:--image=profile=sm_35{{.*}}--image=profile=compute_35
-// SM37:--image=profile=sm_37{{.*}}--image=profile=compute_37
-// SM50:--image=profile=sm_50{{.*}}--image=profile=compute_50
-// SM52:--image=profile=sm_52{{.*}}--image=profile=compute_52
-// SM53:--image=profile=sm_53{{.*}}--image=profile=compute_53
-// SM60:--image=profile=sm_60{{.*}}--image=profile=compute_60
-// SM61:--image=profile=sm_61{{.*}}--image=profile=compute_61
-// SM62:--image=profile=sm_62{{.*}}--image=profile=compute_62
-// SM70:--image=profile=sm_70{{.*}}--image=profile=compute_70
+// SM20:--image=profile=sm_20{{.*}}
+// SM21:--image=profile=sm_21{{.*}}
+// SM30:--image=profile=sm_30{{.*}}
+// SM32:--image=profile=sm_32{{.*}}
+// SM35:--image=profile=sm_35{{.*}}
+// SM37:--image=profile=sm_37{{.*}}
+// SM50:--image=profile=sm_50{{.*}}
+// SM52:--image=profile=sm_52{{.*}}
+// SM53:--image=profile=sm_53{{.*}}
+// SM60:--image=profile=sm_60{{.*}}
+// SM61:--image=profile=sm_61{{.*}}
+// SM62:--image=profile=sm_62{{.*}}
+// SM70:--image=profile=sm_70{{.*}}
 // GFX600:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx600
 // GFX601:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx601
 // GFX602:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx602
diff --git a/clang/test/Driver/cuda-bindings.cu b/clang/test/Driver/cuda-bindings.cu
index 8ee1884936c069..e36198898e5828 100644
--- a/clang/test/Driver/cuda-bindings.cu
+++ b/clang/test/Driver/cuda-bindings.cu
@@ -23,14 +23,14 @@
 // BIN-NOT: cuda-bindings-device-cuda-nvptx64
 // BIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}}  output:
 // BIN-NOT: cuda-bindings-device-cuda-nvptx64
-// BIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
+// BIN: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out"
 
 //
 // Test single gpu architecture up to the assemble phase.
 //
 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --cuda-gpu-arch=sm_30 %s -S 2>&1 \
 // RUN: | FileCheck -check-prefix=ASM %s
-// ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
+// ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[BINDINGS:.+.s]]"
 // ASM-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
 
 //
@@ -61,8 +61,8 @@
 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
 // BIN2: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}}  output:
 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
-// AOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
-// TOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "{{.*}}/out"
+// AOUT: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out"
+// TOUT: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "{{.*}}/out"
 
 // .. same, but with -fsyntax-only
 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
@@ -78,9 +78,9 @@
 // RUN:        --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
 // RUN: | FileCheck -check-prefix=SYN %s
 // SYN-NOT: inputs:
-// SYN: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
-// SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
+//      SYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
 // SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
+// SYN-NEXT: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
 // SYN-NOT: inputs
 
 // .. and with --offload-new-driver
@@ -97,7 +97,7 @@
 // RUN:        --offload-arch=sm_30,sm_35 %s --offload-new-driver -o %t/out 2>&1 \
 // RUN: | FileCheck -check-prefix=NDSYN %s
 // NDSYN-NOT: inputs:
-// NDSYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
+//      NDSYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
 // NDSYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
 // NDSYN-NEXT: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
 // NDSYN-NOT: inputs:
@@ -109,8 +109,8 @@
 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
 // RUN:        --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \
 // RUN: | FileCheck -check-prefix=ASM2 %s
-// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
-// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.s"
+// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[SM30:.+.s]]"
+// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[SM35:.+.s]]"
 // ASM2-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
 
 //
@@ -125,7 +125,7 @@
 // RUN: | FileCheck -check-prefix=HBIN %s
 // HBIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}}  output:
 // HBIN-NOT: cuda-bindings-device-cuda-nvptx64
-// HBIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
+// HBIN: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out"
 
 //
 // Test one or more gpu architecture up to the assemble phase in host-only
@@ -163,7 +163,7 @@
 // Test two gpu architectures with complete compilation in device-only
 // compilation mode.
 //
-// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
+// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --no-offload-new-driver \
 // RUN:        --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only 2>&1 \
 // RUN: | FileCheck -check-prefix=DBIN2 %s
 // DBIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
@@ -177,7 +177,7 @@
 // Test two gpu architectures up to the assemble phase in device-only
 // compilation mode.
 //
-// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
+// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --no-offload-new-driver \
 // RUN:        --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only -S 2>&1 \
 // RUN: | FileCheck -check-prefix=DASM2 %s
 // DASM2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu
index 67facf77f6c68a..34d173b398eb9c 100644
--- a/clang/test/Driver/cuda-options.cu
+++ b/clang/test/Driver/cuda-options.cu
@@ -2,13 +2,13 @@
 
 // Simple compilation case. Compile device-side to PTX assembly and make sure
 // we use it on the host side.
-// RUN: %clang -### -target x86_64-linux-gnu -c -nogpulib -nogpuinc %s 2>&1 \
+// RUN: %clang -### --cuda-include-ptx=all -target x86_64-linux-gnu -c -nogpulib -nogpuinc %s 2>&1 \
 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
 // RUN:    -check-prefix NOLINK %s
 
 // Typical compilation + link case.
-// RUN: %clang -### -target x86_64-linux-gnu -nogpulib -nogpuinc %s 2>&1 \
+// RUN: %clang -### --cuda-include-ptx=all -target x86_64-linux-gnu -nogpulib -nogpuinc %s 2>&1 \
 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
 // RUN:    -check-prefix LINK %s
@@ -33,7 +33,7 @@
 // RUN:    -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
 
 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-compile-host-device \
-// RUN:    --cuda-host-only -nogpulib -nogpuinc %s 2>&1 \
+// RUN:    --cuda-host-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
 // RUN:    -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
 
@@ -48,27 +48,27 @@
 // RUN:    -check-prefix NOHOST -check-prefix NOLINK %s
 
 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-host-only \
-// RUN:   -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
+// RUN:   --cuda-include-ptx=all -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
 // RUN:    -check-prefix LINK %s
 
 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
-// RUN:   -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
+// RUN:   --cuda-include-ptx=all -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
 // RUN:    -check-prefix LINK %s
 
 // Verify that --cuda-gpu-arch option passes the correct GPU architecture to
 // device compilation.
-// RUN: %clang -### -nogpulib -nogpuinc --target=x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \
+// RUN: %clang -### --cuda-include-ptx=all -nogpulib -nogpuinc --target=x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
 // RUN:    -check-prefix DEVICE-SM52 -check-prefix HOST \
 // RUN:    -check-prefix INCLUDES-DEVICE -check-prefix NOLINK %s
 
 // Verify that there is one device-side compilation per --cuda-gpu-arch args
 // and that all results are included on the host side.
-// RUN: %clang -### --target=x86_64-linux-gnu \
+// RUN: %clang -### --target=x86_64-linux-gnu --cuda-include-ptx=all \
 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 -c %s 2>&1 \
 // RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,DEVICE2 \
 // RUN:             -check-prefixes DEVICE-SM52,DEVICE2-SM60 \
@@ -128,9 +128,9 @@
 // f) --no-cuda-gpu-arch=all negates all preceding --cuda-gpu-arch=X
 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
-// RUN:   --no-cuda-gpu-arch=all \
+// RUN:   --no-cuda-version-check --no-cuda-gpu-arch=all \
 // RUN:   --cuda-gpu-arch=sm_70 \
-// RUN:   -c -nogpulib -nogpuinc %s 2>&1 \
+// RUN:   -c --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
 // RUN: | FileCheck -check-prefixes NOARCH-SM52,NOARCH-SM60,ARCH-SM70 %s
 
 // g) There's no --cuda-gpu-arch=all
@@ -141,7 +141,7 @@
 
 
 // Verify that --[no-]cuda-include-ptx arguments are handled correctly.
-// a) by default we're including PTX for all GPUs.
+// a) by default we're not including PTX for all GPUs.
 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \
 // RUN:   --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
 // RUN:   -c %s 2>&1 \
@@ -183,7 +183,8 @@
 // Verify -flto=thin -fwhole-program-vtables handling. This should result in
 // both options being passed to the host compilation, with neither passed to
 // the device compilation.
-// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc -c -flto=thin -fwhole-program-vtables %s 2>&1 \
+// RUN: %clang -### --target=x86_64-linux-gnu --cuda-include-ptx=all \
+// RUN:   -nogpulib -nogpuinc -c -flto=thin -fwhole-program-vtables %s 2>&1 \
 // RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,HOST,INCLUDES-DEVICE,NOLINK,THINLTOWPD %s
 // THINLTOWPD-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
 
diff --git a/clang/test/Driver/cuda-output-asm.cu b/clang/test/Driver/cuda-output-asm.cu
index 6b944d18917247..9d5b86bcbc1b46 100644
--- a/clang/test/Driver/cuda-output-asm.cu
+++ b/clang/test/Driver/cuda-output-asm.cu
@@ -17,13 +17,9 @@
 // SM30-DAG: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 // SM30-same: "-target-cpu" "sm_30"
 
-// RUN: not %clang -### -S --target=x86_64-linux-gnu -o foo.s %s 2>&1 \
-// RUN:   | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s
 // RUN: not %clang -### -S --target=x86_64-linux-gnu --cuda-device-only \
 // RUN:   --cuda-gpu-arch=sm_20 --cuda-gpu-arch=sm_30 -o foo.s %s 2>&1 \
 // RUN:   | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s
-// RUN: not %clang -### -emit-llvm -c --target=x86_64-linux-gnu -o foo.s %s 2>&1 \
-// RUN:   | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s
 // MULTIPLE-OUTPUT-FILES: error: cannot specify -o when generating multiple output files
 // Make sure we do not get duplicate diagnostics.
 // MULTIPLE-OUTPUT-FILES-NOT: error: cannot specify -o when generating multiple output files
diff --git a/clang/test/Driver/hip-gz-options.hip b/clang/test/Driver/hip-gz-options.hip
index 7425d5fa847b3f..7bce8d5f66eebb 100644
--- a/clang/test/Driver/hip-gz-options.hip
+++ b/clang/test/Driver/hip-gz-options.hip
@@ -11,4 +11,3 @@
 // CHECK-DAG: {{".*clang.*" .* "--compress-debug-sections=zlib"}}
 // CHECK-DAG: {{".*lld" .* "--compress-debug-sections=zlib"}}
 // CHECK-DAG: {{".*clang.*" .* "--compress-debug-sections=zlib"}}
-// CHECK: "--compress-debug-sections=zlib"
diff --git a/clang/test/Driver/hip-invalid-target-id.hip b/clang/test/Driver/hip-invalid-target-id.hip
index 555043facb2a35..94e6c4b8bfe0aa 100644
--- a/clang/test/Driver/hip-invalid-target-id.hip
+++ b/clang/test/Driver/hip-invalid-target-id.hip
@@ -4,7 +4,7 @@
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   %s 2>&1 | FileCheck -check-prefix=NOPLUS %s
 
-// NOPLUS: error: invalid target ID 'gfx908xnack'
+// NOPLUS: error: unsupported HIP gpu architecture: gfx908xnack
 
 // RUN: not %clang -### --target=x86_64-linux-gnu \
 // RUN:   -x hip --offload-arch=gfx900 \
@@ -55,7 +55,7 @@
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   %s 2>&1 | FileCheck -check-prefix=NOCOLON %s
 
-// NOCOLON: error: invalid target ID 'gfx900+xnack'
+// NOCOLON: error: unsupported HIP gpu architecture: gfx900+xnack
 
 // RUN: not %clang -### --target=x86_64-linux-gnu \
 // RUN:   -x hip --offload-arch=gfx908 \
diff --git a/clang/test/Driver/hip-macros.hip b/clang/test/Driver/hip-macros.hip
index 3b3afba0b18ca3..36e0f71bd6eff6 100644
--- a/clang/test/Driver/hip-macros.hip
+++ b/clang/test/Driver/hip-macros.hip
@@ -73,8 +73,6 @@
 // RUN: %clang -E -dM --offload-arch=gfx940 --cuda-device-only -nogpuinc -nogpulib \
 // RUN:   %s 2>&1 | FileCheck --check-prefixes=NOPTS %s
 // PTS-DAG: #define __HIP_API_PER_THREAD_DEFAULT_STREAM__ 1
-// PTS-DAG: #define __HIP_API_PER_THREAD_DEFAULT_STREAM__ 1
-// PTS-DAG: #define HIP_API_PER_THREAD_DEFAULT_STREAM 1
 // PTS-DAG: #define HIP_API_PER_THREAD_DEFAULT_STREAM 1
 // NOPTS-NOT: #define __HIP_API_PER_THREAD_DEFAULT_STREAM__
 // NOPTS-NOT: #define HIP_API_PER_THREAD_DEFAULT_STREAM
@@ -85,4 +83,3 @@
 // RUN:   %s 2>&1 | FileCheck --check-prefix=APPROX %s
 // NOAPPROX-NOT: #define __CLANG_GPU_APPROX_TRANSCENDENTALS__
 // APPROX: #define __CLANG_GPU_APPROX_TRANSCENDENTALS__ 1
-// APPROX: #define __CLANG_GPU_APPROX_TRANSCENDENTALS__ 1
diff --git a/clang/test/Driver/hip-offload-arch.hip b/clang/test/Driver/hip-offload-arch.hip
index dd65a0e103ec69..1af53baf63da74 100644
--- a/clang/test/Driver/hip-offload-arch.hip
+++ b/clang/test/Driver/hip-offload-arch.hip
@@ -4,5 +4,5 @@
 // RUN:   -nogpuinc -nogpulib \
 // RUN:   %s 2>&1 | FileCheck %s
 
-// CHECK: {{"[^"]*clang[^"]*".* "-target-cpu" "gfx1030"}}
-// CHECK: {{"[^"]*clang[^"]*".* "-target-cpu" "gfx1031"}}
+// CHECK: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx1030"
+// CHECK: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx1031"
diff --git a/clang/test/Driver/hip-options.hip b/clang/test/Driver/hip-options.hip
index 8c13137735fb91..35db01029feca9 100644
--- a/clang/test/Driver/hip-options.hip
+++ b/clang/test/Driver/hip-options.hip
@@ -83,10 +83,6 @@
 // RUN:   --cuda-gpu-arch=gfx906 -foffload-lto=thin -fwhole-program-vtables %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=HIPTHINLTO %s
 
-// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
-// RUN:   --cuda-gpu-arch=gfx906 -fgpu-rdc -foffload-lto=thin -fwhole-program-vtables %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=HIPTHINLTO %s
-
 // Ensure we don't error about -fwhole-program-vtables for the non-device offload compile.
 // HIPTHINLTO-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
 // HIPTHINLTO-NOT: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit"
@@ -122,7 +118,7 @@
 
 // Check -Xoffload-linker option is passed to lld.
 
-// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib --no-offload-new-driver \
 // RUN:   --cuda-gpu-arch=gfx906 -fgpu-rdc -Xoffload-linker --build-id=md5 %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=OFL-LINK %s
 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
diff --git a/clang/test/Driver/hip-sanitize-options.hip b/clang/test/Driver/hip-sanitize-options.hip
index d94cbdacdaeb3a..da3766790376d5 100644
--- a/clang/test/Driver/hip-sanitize-options.hip
+++ b/clang/test/Driver/hip-sanitize-options.hip
@@ -56,8 +56,8 @@
 // NORDC-NOT: {{"[^"]*lld(\.exe){0,1}".*}} "[[OUT]]" {{".*asanrtl.bc" ".*hip.bc"}}
 // NORDC: {{"[^"]*clang[^"]*".* "-triple" "x86_64-unknown-linux-gnu".* "-fsanitize=address"}}
 
-// RDC: {{"[^"]*clang[^"]*".* "-triple" "x86_64-unknown-linux-gnu".* "-fsanitize=address"}}
 // RDC: {{"[^"]*clang[^"]*".* "-emit-llvm-bc".* "-fcuda-is-device".* "-mlink-bitcode-file" ".*asanrtl.bc".* "-mlink-builtin-bitcode" ".*hip.bc".* "-fsanitize=address".*}} "-o" "[[OUT:[^"]*.bc]]"
+// RDC: {{"[^"]*clang[^"]*".* "-triple" "x86_64-unknown-linux-gnu".* "-fsanitize=address"}}
 // RDC-NOT: {{"[^"]*lld(\.exe){0,1}".*}} "[[OUT]]" {{".*asanrtl.bc" ".*hip.bc"}}
 
 // FAIL: AMDGPU address sanitizer runtime library (asanrtl) is not found. Please install ROCm device library which supports address sanitizer
diff --git a/clang/test/Driver/hip-save-temps.hip b/clang/test/Driver/hip-save-temps.hip
index 142c3f1611a360..2e8489f65d7e0b 100644
--- a/clang/test/Driver/hip-save-temps.hip
+++ b/clang/test/Driver/hip-save-temps.hip
@@ -1,31 +1,31 @@
 // -fno-gpu-rdc without -o with -c
 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
-// RUN:   -nogpuinc --offload-arch=gfx900 -c %s 2>&1 | \
+// RUN:   --no-offload-new-driver -nogpuinc --offload-arch=gfx900 -c %s 2>&1 | \
 // RUN:   FileCheck -check-prefixes=CHECK,NORDC %s
 
 // -fno-gpu-rdc without -o
 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
-// RUN:   -nogpuinc --offload-arch=gfx900 %s 2>&1 | \
+// RUN:   --no-offload-new-driver -nogpuinc --offload-arch=gfx900 %s 2>&1 | \
 // RUN:   FileCheck -check-prefixes=CHECK,NORDC,NOUT %s
 
 // -fno-gpu-rdc with -o
 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
-// RUN:   -nogpuinc -o executable --offload-arch=gfx900 %s 2>&1 | \
+// RUN:   --no-offload-new-driver -nogpuinc -o executable --offload-arch=gfx900 %s 2>&1 | \
 // RUN:   FileCheck -check-prefixes=CHECK,NORDC,WOUT %s
 
 // -fgpu-rdc without -o with -c
 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
-// RUN:   -nogpuinc -fgpu-rdc --offload-arch=gfx900 -c %s 2>&1 | \
+// RUN:   --no-offload-new-driver -nogpuinc -fgpu-rdc --offload-arch=gfx900 -c %s 2>&1 | \
 // RUN:   FileCheck -check-prefixes=CHECK,RDC,RDCC %s
 
 // -fgpu-rdc without -o
 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
-// RUN:   -nogpuinc -fgpu-rdc --offload-arch=gfx900 %s 2>&1 | \
+// RUN:   --no-offload-new-driver -nogpuinc -fgpu-rdc --offload-arch=gfx900 %s 2>&1 | \
 // RUN:   FileCheck -check-prefixes=CHECK,RDC,RDCL,NOUT %s
 
 // -fgpu-rdc with -o
 // UN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
-// UN:   -nogpuinc -o executable -fgpu-rdc --offload-arch=gfx900 %s 2>&1 | \
+// UN:   --offload-new-driver -nogpuinc -o executable -fgpu-rdc --offload-arch=gfx900 %s 2>&1 | \
 // UN:   FileCheck -check-prefixes=CHECK,RDC,RDCL,WOUT %s
 
 // -fgpu-rdc host object path
diff --git a/clang/test/Driver/hip-toolchain-device-only.hip b/clang/test/Driver/hip-toolchain-device-only.hip
index 12097819f66888..c0621854f17cea 100644
--- a/clang/test/Driver/hip-toolchain-device-only.hip
+++ b/clang/test/Driver/hip-toolchain-device-only.hip
@@ -21,7 +21,3 @@
 
 // CHECK: [[LLD]] "-flavor" "gnu" "-m" "elf64_amdgpu" "--no-undefined" "-shared"
 // CHECK-SAME: "-o" "[[IMG_DEV_A_900:.*out]]" [[OBJ_DEV_A_900]]
-
-// CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
-// CHECK-SAME: "-targets={{.*}},hip{{.*}}-amdgcn-amd-amdhsa--gfx803,hip{{.*}}-amdgcn-amd-amdhsa--gfx900"
-// CHECK-SAME: "-input={{.*}}" "-input=[[IMG_DEV_A_803]]" "-input=[[IMG_DEV_A_900]]" "-output=[[BUNDLE_A:.*hipfb]]"
diff --git a/clang/test/Driver/hip-toolchain-mllvm.hip b/clang/test/Driver/hip-toolchain-mllvm.hip
index 33018cc398915b..bedb053b9006cc 100644
--- a/clang/test/Driver/hip-toolchain-mllvm.hip
+++ b/clang/test/Driver/hip-toolchain-mllvm.hip
@@ -30,13 +30,11 @@
 // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
 // CHECK-SAME: {{.*}} "-target-cpu" "gfx803"
 // CHECK-SAME: {{.*}} "-mllvm" "-unroll-count=10" {{.*}}
-// CHECK: [[LLD:".*lld.*"]] {{.*}}"-m" "elf64_amdgpu"{{.*}} "-plugin-opt=-unroll-count=10"{{.*}} "-plugin-opt=-inline-threshold=100"
 
 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
 // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
 // CHECK-SAME: {{.*}} "-target-cpu" "gfx900"
 // CHECK-SAME: {{.*}} "-mllvm" "-unroll-count=10" {{.*}}
-// CHECK: [[LLD:".*lld.*"]] {{.*}} "-plugin-opt=-unroll-count=10"{{.*}} "-plugin-opt=-inline-threshold=100"
 
 // NEG-NOT: {{".*opt"}}
 // NEG-NOT: {{".*llc"}}
diff --git a/clang/test/Driver/hip-toolchain-no-rdc.hip b/clang/test/Driver/hip-toolchain-no-rdc.hip
index 4a91c9dbe7570e..e0591c2b4d5335 100644
--- a/clang/test/Driver/hip-toolchain-no-rdc.hip
+++ b/clang/test/Driver/hip-toolchain-no-rdc.hip
@@ -36,7 +36,7 @@
 // RUN:   %t/a.o %t/b.o \
 // RUN: 2>&1 | FileCheck -check-prefixes=LKONLY %s
 
-// RUN: %clang -### --target=x86_64-linux-gnu \
+// RUN: %clang -### --target=x86_64-linux-gnu --no-offload-new-driver \
 // RUN:   --offload-arch=amdgcnspirv --offload-arch=gfx900 \
 // RUN:   %s -nogpuinc -nogpulib \
 // RUN: 2>&1 | FileCheck -check-prefixes=AMDGCNSPIRV %s
diff --git a/clang/test/Driver/invalid-offload-options.cpp b/clang/test/Driver/invalid-offload-options.cpp
index 48d5310538a3cf..a0b7f1bdbd3981 100644
--- a/clang/test/Driver/invalid-offload-options.cpp
+++ b/clang/test/Driver/invalid-offload-options.cpp
@@ -26,4 +26,4 @@
 // RUN:   --offload=amdgcn-amd-amdhsa --offload-arch=gfx900 %s \
 // RUN: 2>&1 | FileCheck --check-prefix=OFFLOAD-ARCH-MIX %s
 
-// OFFLOAD-ARCH-MIX: error: option '--offload-arch' cannot be specified with '--offload'
+// OFFLOAD-ARCH-MIX: error: option '--offload' cannot be specified with '--offload-arch'
diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index ebafd7eb7774ec..58d13e284f7cde 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -1220,8 +1220,7 @@ getDeviceInput(const ArgList &Args) {
             : std::string(Arg->getValue());
 
     if (!Filename && Arg->getOption().matches(OPT_library))
-      reportError(
-          createStringError("unable to find library -l%s", Arg->getValue()));
+      return createStringError("unable to find library -l%s", Arg->getValue());
 
     if (!Filename || !sys::fs::exists(*Filename) ||
         sys::fs::is_directory(*Filename))
@@ -1233,6 +1232,12 @@ getDeviceInput(const ArgList &Args) {
       return createFileError(*Filename, EC);
 
     MemoryBufferRef Buffer = **BufferOrErr;
+    if (identify_magic(Buffer.getBuffer()) == file_magic::offload_bundle ||
+        identify_magic(Buffer.getBuffer()) ==
+            file_magic::offload_bundle_compressed)
+      return createStringError(
+          "clang offload bundles are deprecated. Recompile with "
+          "'--no-offload-new-driver'");
     if (identify_magic(Buffer.getBuffer()) == file_magic::elf_shared_object)
       continue;
 
diff --git a/clang/unittests/Tooling/ToolingTest.cpp b/clang/unittests/Tooling/ToolingTest.cpp
index f41a44fa0922a1..61c84890408f67 100644
--- a/clang/unittests/Tooling/ToolingTest.cpp
+++ b/clang/unittests/Tooling/ToolingTest.cpp
@@ -421,9 +421,9 @@ TEST_F(CommandLineExtractorTest, AcceptOffloadingCompile) {
 TEST_F(CommandLineExtractorTest, AcceptOffloadingSyntaxOnly) {
   addFile("test.c", "int main() {}\n");
   const char *Args[] = {
-      "clang",         "-target",   "arm64-apple-macosx11.0.0",
-      "-fsyntax-only", "-x",        "hip",
-      "test.c",        "-nogpulib", "-nogpuinc"};
+      "clang",     "-target",  "arm64-apple-macosx11.0.0", "-fsyntax-only",
+      "-x",        "hip",      "--no-offload-new-driver",  "test.c",
+      "-nogpulib", "-nogpuinc"};
   EXPECT_NE(extractCC1Arguments(Args), nullptr);
 }
 
diff --git a/llvm/lib/Object/OffloadBinary.cpp b/llvm/lib/Object/OffloadBinary.cpp
index 89dc12551494fd..d7f4ee4644a2f0 100644
--- a/llvm/lib/Object/OffloadBinary.cpp
+++ b/llvm/lib/Object/OffloadBinary.cpp
@@ -72,6 +72,15 @@ Error extractFromObject(const ObjectFile &Obj,
   assert((Obj.isELF() || Obj.isCOFF()) && "Invalid file type");
 
   for (SectionRef Sec : Obj.sections()) {
+    Expected<StringRef> NameOrErr = Sec.getName();
+    if (!NameOrErr)
+      return NameOrErr.takeError();
+
+    if (NameOrErr->starts_with("__CLANG_OFFLOAD_BUNDLE__"))
+      return createStringError(
+          "clang offload bundles are deprecated. Recompile with "
+          "'--no-offload-new-driver'");
+
     // ELF files contain a section with the LLVM_OFFLOADING type.
     if (Obj.isELF() &&
         static_cast<ELFSectionRef>(Sec).getType() != ELF::SHT_LLVM_OFFLOADING)
@@ -79,10 +88,6 @@ Error extractFromObject(const ObjectFile &Obj,
 
     // COFF has no section types so we rely on the name of the section.
     if (Obj.isCOFF()) {
-      Expected<StringRef> NameOrErr = Sec.getName();
-      if (!NameOrErr)
-        return NameOrErr.takeError();
-
       if (!NameOrErr->starts_with(".llvm.offloading"))
         continue;
     }



More information about the cfe-commits mailing list